diff --git a/Cargo.lock b/Cargo.lock index 0f6368e4497cb..e16648dbef325 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5430,9 +5430,9 @@ dependencies = [ [[package]] name = "sysinfo" -version = "0.39.0" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd9f9fe3d2b7b75cf4f2805e5b9926e8ac47146667b16b86298c4a8bf08cc469" +checksum = "14311e7e9a03114cd4b65eedd54e8fed2945e17f08586ae97ef53bc0669f9581" dependencies = [ "libc", "objc2-core-foundation", diff --git a/compiler/rustc_builtin_macros/src/asm.rs b/compiler/rustc_builtin_macros/src/asm.rs index a1e14b5245137..d779089eeaa4c 100644 --- a/compiler/rustc_builtin_macros/src/asm.rs +++ b/compiler/rustc_builtin_macros/src/asm.rs @@ -10,7 +10,7 @@ use rustc_parse_format as parse; use rustc_session::lint; use rustc_span::{ErrorGuaranteed, InnerSpan, Span, Symbol, sym}; use rustc_target::asm::InlineAsmArch; -use smallvec::smallvec; +use smallvec::{SmallVec, smallvec}; use crate::errors; use crate::util::{ExprToSpannedString, expr_to_spanned_string}; @@ -26,6 +26,24 @@ struct ValidatedAsmArgs { pub options_spans: Vec, } +struct MacGlobalAsm { + item: ast::Item, +} + +impl MacResult for MacGlobalAsm { + fn make_items(self: Box) -> Option; 1]>> { + Some(smallvec![Box::new(self.item)]) + } + + fn make_stmts(self: Box) -> Option> { + Some(smallvec![ast::Stmt { + id: ast::DUMMY_NODE_ID, + span: self.item.span, + kind: ast::StmtKind::Item(Box::new(self.item)), + }]) + } +} + fn parse_args<'a>( ecx: &ExtCtxt<'a>, sp: Span, @@ -650,18 +668,20 @@ pub(super) fn expand_global_asm<'cx>( return ExpandResult::Retry(()); }; match mac { - Ok(inline_asm) => MacEager::items(smallvec![Box::new(ast::Item { - attrs: ast::AttrVec::new(), - id: ast::DUMMY_NODE_ID, - kind: ast::ItemKind::GlobalAsm(Box::new(inline_asm)), - vis: ast::Visibility { - span: sp.shrink_to_lo(), - kind: ast::VisibilityKind::Inherited, + Ok(inline_asm) => Box::new(MacGlobalAsm { + item: ast::Item { + attrs: ast::AttrVec::new(), + id: ast::DUMMY_NODE_ID, + kind: ast::ItemKind::GlobalAsm(Box::new(inline_asm)), + vis: ast::Visibility { + span: sp.shrink_to_lo(), + kind: ast::VisibilityKind::Inherited, + tokens: None, + }, + span: sp, tokens: None, }, - span: sp, - tokens: None, - })]), + }), Err(guar) => DummyResult::any(sp, guar), } } diff --git a/compiler/rustc_feature/src/accepted.rs b/compiler/rustc_feature/src/accepted.rs index 54a935d13c8c1..eaf8eddfff3a5 100644 --- a/compiler/rustc_feature/src/accepted.rs +++ b/compiler/rustc_feature/src/accepted.rs @@ -108,6 +108,8 @@ declare_features! ( (accepted, cfg_target_abi, "1.78.0", Some(80970)), /// Allows `cfg(target_feature = "...")`. (accepted, cfg_target_feature, "1.27.0", Some(29717)), + /// Allows `cfg(target_has_atomic_primitive_alignment = "...")`. + (accepted, cfg_target_has_atomic_equal_alignment, "CURRENT_RUSTC_VERSION", Some(93822)), /// Allows `cfg(target_vendor = "...")`. (accepted, cfg_target_vendor, "1.33.0", Some(29718)), /// Allows implementing `Clone` for closures where possible (RFC 2132). diff --git a/compiler/rustc_feature/src/builtin_attrs.rs b/compiler/rustc_feature/src/builtin_attrs.rs index 58bf12855ad6e..e10b9a77be851 100644 --- a/compiler/rustc_feature/src/builtin_attrs.rs +++ b/compiler/rustc_feature/src/builtin_attrs.rs @@ -20,11 +20,6 @@ const GATED_CFGS: &[GatedCfg] = &[ (sym::ub_checks, sym::cfg_ub_checks, Features::cfg_ub_checks), (sym::contract_checks, sym::cfg_contract_checks, Features::cfg_contract_checks), (sym::target_thread_local, sym::cfg_target_thread_local, Features::cfg_target_thread_local), - ( - sym::target_has_atomic_equal_alignment, - sym::cfg_target_has_atomic_equal_alignment, - Features::cfg_target_has_atomic_equal_alignment, - ), ( sym::target_has_atomic_load_store, sym::cfg_target_has_atomic, diff --git a/compiler/rustc_feature/src/unstable.rs b/compiler/rustc_feature/src/unstable.rs index 8385c320d377d..fce8fe923a133 100644 --- a/compiler/rustc_feature/src/unstable.rs +++ b/compiler/rustc_feature/src/unstable.rs @@ -444,8 +444,6 @@ declare_features! ( (unstable, cfg_target_compact, "1.63.0", Some(96901)), /// Allows `cfg(target_has_atomic_load_store = "...")`. (unstable, cfg_target_has_atomic, "1.60.0", Some(94039)), - /// Allows `cfg(target_has_atomic_equal_alignment = "...")`. - (unstable, cfg_target_has_atomic_equal_alignment, "1.60.0", Some(93822)), /// Allows `cfg(target_object_format = "...")`. (unstable, cfg_target_object_format, "CURRENT_RUSTC_VERSION", Some(152586)), /// Allows `cfg(target_thread_local)`. diff --git a/compiler/rustc_hir_typeck/src/loops.rs b/compiler/rustc_hir_typeck/src/loops.rs index 21b408064fac5..d0009c36f97a4 100644 --- a/compiler/rustc_hir_typeck/src/loops.rs +++ b/compiler/rustc_hir_typeck/src/loops.rs @@ -270,7 +270,13 @@ impl<'hir> Visitor<'hir> for CheckLoopVisitor<'hir> { } } - let sp_lo = e.span.with_lo(e.span.lo() + BytePos("break".len() as u32)); + let sp_lo = if let Ok(snippet) = self.tcx.sess.source_map().span_to_snippet(e.span) + && let Some(break_pos) = snippet.find("break") + { + e.span.with_lo(e.span.lo() + BytePos((break_pos + "break".len()) as u32)) + } else { + e.span.with_lo(e.span.lo() + BytePos("break".len() as u32)) + }; let label_sp = match break_destination.label { Some(label) => sp_lo.with_hi(label.ident.span.hi()), None => sp_lo.shrink_to_lo(), diff --git a/compiler/rustc_session/src/config/cfg.rs b/compiler/rustc_session/src/config/cfg.rs index 9c76f4f3db92f..d16ab59a02d9e 100644 --- a/compiler/rustc_session/src/config/cfg.rs +++ b/compiler/rustc_session/src/config/cfg.rs @@ -149,7 +149,7 @@ pub(crate) fn disallow_cfgs(sess: &Session, user_cfgs: &Cfg) { | (sym::target_pointer_width, Some(_)) | (sym::target_vendor, None | Some(_)) | (sym::target_has_atomic, Some(_)) - | (sym::target_has_atomic_equal_alignment, Some(_)) + | (sym::target_has_atomic_primitive_alignment, Some(_)) | (sym::target_has_atomic_load_store, Some(_)) | (sym::target_has_reliable_f16, None | Some(_)) | (sym::target_has_reliable_f16_math, None | Some(_)) @@ -293,7 +293,7 @@ pub(crate) fn default_configuration(sess: &Session) -> Cfg { ins_sym!(sym::target_has_atomic, sym); } if align.bits() == i { - ins_sym!(sym::target_has_atomic_equal_alignment, sym); + ins_sym!(sym::target_has_atomic_primitive_alignment, sym); } ins_sym!(sym::target_has_atomic_load_store, sym); }; @@ -485,13 +485,10 @@ impl CheckCfg { sym::integer(64usize), sym::integer(128usize), ]; - for sym in [ - sym::target_has_atomic, - sym::target_has_atomic_equal_alignment, - sym::target_has_atomic_load_store, - ] { + for sym in [sym::target_has_atomic, sym::target_has_atomic_load_store] { ins!(sym, no_values).extend(atomic_values); } + ins!(sym::target_has_atomic_primitive_alignment, empty_values).extend(atomic_values); ins!(sym::target_thread_local, no_values); diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 69ed7314855f9..3221610cc10b6 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -237,6 +237,7 @@ symbols! { IntoFuture, IntoIterator, IntoIteratorItem, + IoBufReader, IrTyKind, Item, ItemContext, @@ -306,6 +307,7 @@ symbols! { Some, Source, SpanCtxt, + StdinLock, Str, String, Struct, @@ -2023,8 +2025,8 @@ symbols! { target_feature_11, target_feature_inline_always, target_has_atomic, - target_has_atomic_equal_alignment, target_has_atomic_load_store, + target_has_atomic_primitive_alignment, target_has_reliable_f16, target_has_reliable_f16_math, target_has_reliable_f128, diff --git a/library/core/src/array/drain.rs b/library/core/src/array/drain.rs index b2ff54bdfa21c..329b0e18b982b 100644 --- a/library/core/src/array/drain.rs +++ b/library/core/src/array/drain.rs @@ -6,8 +6,7 @@ impl<'l, 'f, T, U, F: FnMut(T) -> U> Drain<'l, 'f, T, F> { /// This function returns a function that lets you index the given array in const. /// As implemented it can optimize better than iterators, and can be constified. /// It acts like a sort of guard (owns the array) and iterator combined, which can be implemented - /// as it is a struct that implements const fn; - /// in that regard it is somewhat similar to an array::Iter implementing `UncheckedIterator`. + /// as it is a struct that implements const fn. /// The only method you're really allowed to call is `next()`, /// anything else is more or less UB, hence this function being unsafe. /// Moved elements will not be dropped. diff --git a/library/core/src/array/mod.rs b/library/core/src/array/mod.rs index ab10120fe5548..56f188cfa622a 100644 --- a/library/core/src/array/mod.rs +++ b/library/core/src/array/mod.rs @@ -11,7 +11,7 @@ use crate::convert::Infallible; use crate::error::Error; use crate::hash::{self, Hash}; use crate::intrinsics::transmute_unchecked; -use crate::iter::{TrustedLen, UncheckedIterator, repeat_n}; +use crate::iter::{TrustedLen, repeat_n}; use crate::marker::Destruct; use crate::mem::{self, ManuallyDrop, MaybeUninit}; use crate::ops::{ @@ -52,7 +52,10 @@ pub use iter::IntoIter; #[must_use = "cloning is often expensive and is not expected to have side effects"] #[stable(feature = "array_repeat", since = "1.91.0")] pub fn repeat(val: T) -> [T; N] { - from_trusted_iterator(repeat_n(val, N)) + let mut iter = repeat_n(val, N); + // SAFETY: Unless a panic occurs, from_fn will call the closure N times, + // and repeat_n's next() will return Some for N times. + from_fn(move |_| unsafe { iter.next().unwrap_unchecked() }) } /// Creates an array where each element is produced by calling `f` with @@ -464,7 +467,15 @@ trait SpecArrayClone: Clone { impl SpecArrayClone for T { #[inline] default fn clone(array: &[T; N]) -> [T; N] { - from_trusted_iterator(array.iter().cloned()) + let mut ptr: *const T = array.as_ptr(); + // SAFETY: Unless a panic occurs, from_fn will call the closure N times, + // so our pointer arithmetic will be in bounds for the N-element array. + // This works even for ZSTs, since in that case, add() is a no-op. + from_fn(move |_| unsafe { + let old = ptr; + ptr = ptr.add(1); + (&*old).clone() + }) } } @@ -877,39 +888,6 @@ impl [T; N] { } } -/// Populate an array from the first `N` elements of `iter` -/// -/// # Panics -/// -/// If the iterator doesn't actually have enough items. -/// -/// By depending on `TrustedLen`, however, we can do that check up-front (where -/// it easily optimizes away) so it doesn't impact the loop that fills the array. -#[inline] -fn from_trusted_iterator(iter: impl UncheckedIterator) -> [T; N] { - try_from_trusted_iterator(iter.map(NeverShortCircuit)).0 -} - -#[inline] -fn try_from_trusted_iterator( - iter: impl UncheckedIterator, -) -> ChangeOutputType -where - R: Try, - R::Residual: Residual<[T; N]>, -{ - assert!(iter.size_hint().0 >= N); - fn next(mut iter: impl UncheckedIterator) -> impl FnMut(usize) -> T { - move |_| { - // SAFETY: We know that `from_fn` will call this at most N times, - // and we checked to ensure that we have at least that many items. - unsafe { iter.next_unchecked() } - } - } - - try_from_fn(next(iter)) -} - /// Version of [`try_from_fn`] using a passed-in slice in order to avoid /// needing to monomorphize for every array length. /// diff --git a/library/core/src/ffi/c_str.rs b/library/core/src/ffi/c_str.rs index 97831c41dc0fb..c2d6f7b235a53 100644 --- a/library/core/src/ffi/c_str.rs +++ b/library/core/src/ffi/c_str.rs @@ -155,7 +155,7 @@ impl Error for FromBytesWithNulError {} /// within the slice. /// /// This error is created by the [`CStr::from_bytes_until_nul`] method. -#[derive(Clone, PartialEq, Eq, Debug)] +#[derive(Clone, Copy, PartialEq, Eq, Debug)] #[stable(feature = "cstr_from_bytes_until_nul", since = "1.69.0")] pub struct FromBytesUntilNulError(()); diff --git a/library/core/src/fmt/num.rs b/library/core/src/fmt/num.rs index d395d0ab58354..309aee53f93c7 100644 --- a/library/core/src/fmt/num.rs +++ b/library/core/src/fmt/num.rs @@ -868,7 +868,7 @@ fn div_rem_1e16(n: u128) -> (u128, u64) { const M_HIGH: u128 = 76624777043294442917917351357515459181; const SH_POST: u8 = 51; - let quot = n.widening_mul(M_HIGH).1 >> SH_POST; + let quot = n.carrying_mul(M_HIGH, 0).1 >> SH_POST; let rem = n - quot * D; (quot, rem as u64) } diff --git a/library/core/src/iter/adapters/cloned.rs b/library/core/src/iter/adapters/cloned.rs index 54d132813e4db..c0b6a4053c825 100644 --- a/library/core/src/iter/adapters/cloned.rs +++ b/library/core/src/iter/adapters/cloned.rs @@ -2,7 +2,7 @@ use core::num::NonZero; use crate::iter::adapters::zip::try_get_unchecked; use crate::iter::adapters::{SourceIter, TrustedRandomAccess, TrustedRandomAccessNoCoerce}; -use crate::iter::{FusedIterator, InPlaceIterable, TrustedLen, UncheckedIterator}; +use crate::iter::{FusedIterator, InPlaceIterable, TrustedLen}; use crate::ops::Try; /// An iterator that clones the elements of an underlying iterator. @@ -142,19 +142,6 @@ where { } -impl<'a, I, T: 'a> UncheckedIterator for Cloned -where - I: UncheckedIterator, - T: Clone, -{ - unsafe fn next_unchecked(&mut self) -> T { - // SAFETY: `Cloned` is 1:1 with the inner iterator, so if the caller promised - // that there's an element left, the inner iterator has one too. - let item = unsafe { self.it.next_unchecked() }; - item.clone() - } -} - #[stable(feature = "default_iters", since = "1.70.0")] impl Default for Cloned { /// Creates a `Cloned` iterator from the default value of `I` diff --git a/library/core/src/iter/adapters/map.rs b/library/core/src/iter/adapters/map.rs index f768f077aa27e..75f70dcd9b58f 100644 --- a/library/core/src/iter/adapters/map.rs +++ b/library/core/src/iter/adapters/map.rs @@ -1,7 +1,7 @@ use crate::fmt; use crate::iter::adapters::zip::try_get_unchecked; use crate::iter::adapters::{SourceIter, TrustedRandomAccess, TrustedRandomAccessNoCoerce}; -use crate::iter::{FusedIterator, InPlaceIterable, TrustedFused, TrustedLen, UncheckedIterator}; +use crate::iter::{FusedIterator, InPlaceIterable, TrustedFused, TrustedLen}; use crate::num::NonZero; use crate::ops::Try; @@ -194,19 +194,6 @@ where { } -impl UncheckedIterator for Map -where - I: UncheckedIterator, - F: FnMut(I::Item) -> B, -{ - unsafe fn next_unchecked(&mut self) -> B { - // SAFETY: `Map` is 1:1 with the inner iterator, so if the caller promised - // that there's an element left, the inner iterator has one too. - let item = unsafe { self.iter.next_unchecked() }; - (self.f)(item) - } -} - #[doc(hidden)] #[unstable(feature = "trusted_random_access", issue = "none")] unsafe impl TrustedRandomAccess for Map where I: TrustedRandomAccess {} diff --git a/library/core/src/iter/adapters/zip.rs b/library/core/src/iter/adapters/zip.rs index c5e199c30821d..4b19c7ffc00f8 100644 --- a/library/core/src/iter/adapters/zip.rs +++ b/library/core/src/iter/adapters/zip.rs @@ -1,8 +1,6 @@ use crate::cmp; use crate::fmt::{self, Debug}; -use crate::iter::{ - FusedIterator, InPlaceIterable, SourceIter, TrustedFused, TrustedLen, UncheckedIterator, -}; +use crate::iter::{FusedIterator, InPlaceIterable, SourceIter, TrustedFused, TrustedLen}; use crate::num::NonZero; /// An iterator that iterates two other iterators simultaneously. @@ -456,13 +454,6 @@ where { } -impl UncheckedIterator for Zip -where - A: UncheckedIterator, - B: UncheckedIterator, -{ -} - // Arbitrarily selects the left side of the zip iteration as extractable "source" // it would require negative trait bounds to be able to try both #[unstable(issue = "none", feature = "inplace_iteration")] diff --git a/library/core/src/iter/mod.rs b/library/core/src/iter/mod.rs index d532f1e568071..9ddafd47807f2 100644 --- a/library/core/src/iter/mod.rs +++ b/library/core/src/iter/mod.rs @@ -458,7 +458,6 @@ pub use self::traits::TrustedFused; pub use self::traits::TrustedLen; #[unstable(feature = "trusted_step", issue = "85731")] pub use self::traits::TrustedStep; -pub(crate) use self::traits::UncheckedIterator; #[stable(feature = "rust1", since = "1.0.0")] pub use self::traits::{ DoubleEndedIterator, ExactSizeIterator, Extend, FromIterator, IntoIterator, Product, Sum, diff --git a/library/core/src/iter/sources/repeat_n.rs b/library/core/src/iter/sources/repeat_n.rs index 4cbaf41852142..0d4ced1b0c8a6 100644 --- a/library/core/src/iter/sources/repeat_n.rs +++ b/library/core/src/iter/sources/repeat_n.rs @@ -1,5 +1,5 @@ use crate::fmt; -use crate::iter::{FusedIterator, TrustedLen, UncheckedIterator}; +use crate::iter::{FusedIterator, TrustedLen}; use crate::num::NonZero; use crate::ops::Try; @@ -211,5 +211,3 @@ impl FusedIterator for RepeatN {} #[unstable(feature = "trusted_len", issue = "37572")] unsafe impl TrustedLen for RepeatN {} -#[stable(feature = "iter_repeat_n", since = "1.82.0")] -impl UncheckedIterator for RepeatN {} diff --git a/library/core/src/iter/traits/mod.rs b/library/core/src/iter/traits/mod.rs index b330e9ffe21ac..7639704d5799c 100644 --- a/library/core/src/iter/traits/mod.rs +++ b/library/core/src/iter/traits/mod.rs @@ -4,7 +4,6 @@ mod double_ended; mod exact_size; mod iterator; mod marker; -mod unchecked_iterator; #[unstable(issue = "none", feature = "inplace_iteration")] pub use self::marker::InPlaceIterable; @@ -12,7 +11,6 @@ pub use self::marker::InPlaceIterable; pub use self::marker::TrustedFused; #[unstable(feature = "trusted_step", issue = "85731")] pub use self::marker::TrustedStep; -pub(crate) use self::unchecked_iterator::UncheckedIterator; #[stable(feature = "rust1", since = "1.0.0")] pub use self::{ accum::{Product, Sum}, diff --git a/library/core/src/iter/traits/unchecked_iterator.rs b/library/core/src/iter/traits/unchecked_iterator.rs deleted file mode 100644 index ae4bfcad4e68f..0000000000000 --- a/library/core/src/iter/traits/unchecked_iterator.rs +++ /dev/null @@ -1,36 +0,0 @@ -use crate::iter::TrustedLen; - -/// [`TrustedLen`] cannot have methods, so this allows augmenting it. -/// -/// It currently requires `TrustedLen` because it's unclear whether it's -/// reasonably possible to depend on the `size_hint` of anything else. -pub(crate) trait UncheckedIterator: TrustedLen { - /// Gets the next item from a non-empty iterator. - /// - /// Because there's always a value to return, that means it can return - /// the `Item` type directly, without wrapping it in an `Option`. - /// - /// # Safety - /// - /// This can only be called if `size_hint().0 != 0`, guaranteeing that - /// there's at least one item available. - /// - /// Otherwise (aka when `size_hint().1 == Some(0)`), this is UB. - /// - /// # Note to Implementers - /// - /// This has a default implementation using [`Option::unwrap_unchecked`]. - /// That's probably sufficient if your `next` *always* returns `Some`, - /// such as for infinite iterators. In more complicated situations, however, - /// sometimes there can still be `insertvalue`/`assume`/`extractvalue` - /// instructions remaining in the IR from the `Option` handling, at which - /// point you might want to implement this manually instead. - #[unstable(feature = "trusted_len_next_unchecked", issue = "37572")] - #[inline] - unsafe fn next_unchecked(&mut self) -> Self::Item { - let opt = self.next(); - // SAFETY: The caller promised that we're not empty, and - // `Self: TrustedLen` so we can actually trust the `size_hint`. - unsafe { opt.unwrap_unchecked() } - } -} diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs index 57ce51bb8c0ed..3e18f87e20537 100644 --- a/library/core/src/lib.rs +++ b/library/core/src/lib.rs @@ -61,11 +61,11 @@ target_has_atomic = "32", target_has_atomic = "64", target_has_atomic = "ptr", - target_has_atomic_equal_alignment = "8", - target_has_atomic_equal_alignment = "16", - target_has_atomic_equal_alignment = "32", - target_has_atomic_equal_alignment = "64", - target_has_atomic_equal_alignment = "ptr", + target_has_atomic_primitive_alignment = "8", + target_has_atomic_primitive_alignment = "16", + target_has_atomic_primitive_alignment = "32", + target_has_atomic_primitive_alignment = "64", + target_has_atomic_primitive_alignment = "ptr", target_has_atomic_load_store = "8", target_has_atomic_load_store = "16", target_has_atomic_load_store = "32", @@ -123,7 +123,6 @@ #![feature(auto_traits)] #![feature(cfg_sanitize)] #![feature(cfg_target_has_atomic)] -#![feature(cfg_target_has_atomic_equal_alignment)] #![feature(cfg_ub_checks)] #![feature(const_closures)] #![feature(const_precise_live_drops)] diff --git a/library/core/src/num/imp/diy_float.rs b/library/core/src/num/imp/diy_float.rs index e054e7f3f10a7..439b74b31a78b 100644 --- a/library/core/src/num/imp/diy_float.rs +++ b/library/core/src/num/imp/diy_float.rs @@ -22,7 +22,7 @@ pub struct Fp { impl Fp { /// Returns a correctly rounded product of itself and `other`. pub fn mul(self, other: Self) -> Self { - let (lo, hi) = self.f.widening_mul(other.f); + let (lo, hi) = self.f.carrying_mul(other.f, 0); let f = hi + (lo >> 63) /* round */; let e = self.e + other.e + 64; Self { f, e } diff --git a/library/core/src/num/int_macros.rs b/library/core/src/num/int_macros.rs index 58c499efd55ce..4e7be6f065c9b 100644 --- a/library/core/src/num/int_macros.rs +++ b/library/core/src/num/int_macros.rs @@ -2777,32 +2777,6 @@ macro_rules! int_impl { (a as Self, b) } - /// Calculates the complete product `self * rhs` without the possibility to overflow. - /// - /// This returns the low-order (wrapping) bits and the high-order (overflow) bits - /// of the result as two separate values, in that order. - /// - /// If you also need to add a carry to the wide result, then you want - /// [`Self::carrying_mul`] instead. - /// - /// # Examples - /// - /// Please note that this example is shared among integer types, which is why `i32` is used. - /// - /// ``` - /// #![feature(widening_mul)] - /// assert_eq!(5i32.widening_mul(-2), (4294967286, -1)); - /// assert_eq!(1_000_000_000i32.widening_mul(-10), (2884901888, -3)); - /// ``` - #[unstable(feature = "widening_mul", issue = "152016")] - #[rustc_const_unstable(feature = "widening_mul", issue = "152016")] - #[must_use = "this returns the result of the operation, \ - without modifying the original"] - #[inline] - pub const fn widening_mul(self, rhs: Self) -> ($UnsignedT, Self) { - Self::carrying_mul_add(self, rhs, 0, 0) - } - /// Calculates the "full multiplication" `self * rhs + carry` /// without the possibility to overflow. /// @@ -2813,8 +2787,6 @@ macro_rules! int_impl { /// additional amount of overflow. This allows for chaining together multiple /// multiplications to create "big integers" which represent larger values. /// - /// If you don't need the `carry`, then you can use [`Self::widening_mul`] instead. - /// /// # Examples /// /// Please note that this example is shared among integer types, which is why `i32` is used. @@ -2849,8 +2821,7 @@ macro_rules! int_impl { /// additional amount of overflow. This allows for chaining together multiple /// multiplications to create "big integers" which represent larger values. /// - /// If you don't need either `carry`, then you can use [`Self::widening_mul`] instead, - /// and if you only need one `carry`, then you can use [`Self::carrying_mul`] instead. + /// If you only need one `carry`, then you can use [`Self::carrying_mul`] instead. /// /// # Examples /// diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs index fdd4831336179..ed4624f7c9494 100644 --- a/library/core/src/num/mod.rs +++ b/library/core/src/num/mod.rs @@ -240,6 +240,39 @@ macro_rules! midpoint_impl { }; } +macro_rules! widening_mul_impl { + ($SelfT:ty, $WideT:ty) => { + /// Widening multiplication. Computes `self * rhs`, widening to a larger integer. + /// + /// The returned value is always exact and can never overflow. + /// + /// Note that this method is semantically equivalent to [`carrying_mul`] with a + /// carry of zero, with the latter instead returning a tuple denoting the low and + /// high parts of the result. Consider using it instead if you need + /// interoperability with other big int helper functions, or if this method isn't + /// available for a given type. + /// + /// [`carrying_mul`]: Self::carrying_mul + /// + /// # Examples + /// + /// ``` + /// #![feature(widening_mul)] + /// + #[doc = concat!("assert_eq!(", stringify!($SelfT), "::MAX.widening_mul(0_", stringify!($SelfT), "), 0);")] + #[doc = concat!("assert_eq!(", stringify!($SelfT), "::MAX.widening_mul(", stringify!($SelfT), "::MAX), ", stringify!($SelfT), "::MAX as ", stringify!($WideT), " * ", stringify!($SelfT), "::MAX as ", stringify!($WideT), ");")] + /// ``` + #[unstable(feature = "widening_mul", issue = "152016")] + #[rustc_const_unstable(feature = "widening_mul", issue = "152016")] + #[must_use = "this returns the result of the operation, \ + without modifying the original"] + #[inline] + pub const fn widening_mul(self, rhs: Self) -> $WideT { + self as $WideT * rhs as $WideT + } + } +} + macro_rules! widening_carryless_mul_impl { ($SelfT:ty, $WideT:ty) => { /// Performs a widening carry-less multiplication. @@ -360,6 +393,7 @@ impl i8 { bound_condition = "", } midpoint_impl! { i8, i16, signed } + widening_mul_impl! { i8, i16 } } impl i16 { @@ -384,6 +418,7 @@ impl i16 { bound_condition = "", } midpoint_impl! { i16, i32, signed } + widening_mul_impl! { i16, i32 } } impl i32 { @@ -408,6 +443,7 @@ impl i32 { bound_condition = "", } midpoint_impl! { i32, i64, signed } + widening_mul_impl! { i32, i64 } } impl i64 { @@ -432,6 +468,7 @@ impl i64 { bound_condition = "", } midpoint_impl! { i64, signed } + widening_mul_impl! { i64, i128 } } impl i128 { @@ -568,6 +605,7 @@ impl u8 { bound_condition = "", } midpoint_impl! { u8, u16, unsigned } + widening_mul_impl! { u8, u16 } widening_carryless_mul_impl! { u8, u16 } carrying_carryless_mul_impl! { u8, u16 } @@ -1215,6 +1253,7 @@ impl u16 { bound_condition = "", } midpoint_impl! { u16, u32, unsigned } + widening_mul_impl! { u16, u32 } widening_carryless_mul_impl! { u16, u32 } carrying_carryless_mul_impl! { u16, u32 } @@ -1270,6 +1309,7 @@ impl u32 { bound_condition = "", } midpoint_impl! { u32, u64, unsigned } + widening_mul_impl! { u32, u64 } widening_carryless_mul_impl! { u32, u64 } carrying_carryless_mul_impl! { u32, u64 } } @@ -1301,6 +1341,7 @@ impl u64 { bound_condition = "", } midpoint_impl! { u64, u128, unsigned } + widening_mul_impl! { u64, u128 } widening_carryless_mul_impl! { u64, u128 } carrying_carryless_mul_impl! { u64, u128 } } diff --git a/library/core/src/num/uint_macros.rs b/library/core/src/num/uint_macros.rs index 8dc668584b658..9bf765f1c4ed9 100644 --- a/library/core/src/num/uint_macros.rs +++ b/library/core/src/num/uint_macros.rs @@ -710,6 +710,7 @@ macro_rules! uint_impl { /// assert_eq!(n.extract_bits(0b0010_0100), 0b0000_0011); /// assert_eq!(n.extract_bits(0xF0), 0b0000_1011); /// ``` + #[doc(alias = "pext")] #[unstable(feature = "uint_gather_scatter_bits", issue = "149069")] #[must_use = "this returns the result of the operation, \ without modifying the original"] @@ -727,6 +728,7 @@ macro_rules! uint_impl { /// assert_eq!(n.deposit_bits(0b0101_0101), 0b0101_0001); /// assert_eq!(n.deposit_bits(0xF0), 0b1101_0000); /// ``` + #[doc(alias = "pdep")] #[unstable(feature = "uint_gather_scatter_bits", issue = "149069")] #[must_use = "this returns the result of the operation, \ without modifying the original"] @@ -3177,54 +3179,6 @@ macro_rules! uint_impl { (a as Self, b) } - /// Calculates the complete double-width product `self * rhs`. - /// - /// This returns the low-order (wrapping) bits and the high-order (overflow) bits - /// of the result as two separate values, in that order. As such, - /// `a.widening_mul(b).0` produces the same result as `a.wrapping_mul(b)`. - /// - /// If you also need to add a value and carry to the wide result, then you want - /// [`Self::carrying_mul_add`] instead. - /// - /// If you also need to add a carry to the wide result, then you want - /// [`Self::carrying_mul`] instead. - /// - /// If you just want to know *whether* the multiplication overflowed, then you - /// want [`Self::overflowing_mul`] instead. - /// - /// # Examples - /// - /// ``` - /// #![feature(widening_mul)] - #[doc = concat!("assert_eq!(5_", stringify!($SelfT), ".widening_mul(7), (35, 0));")] - #[doc = concat!("assert_eq!(", stringify!($SelfT), "::MAX.widening_mul(", stringify!($SelfT), "::MAX), (1, ", stringify!($SelfT), "::MAX - 1));")] - /// ``` - /// - /// Compared to other `*_mul` methods: - /// ``` - /// #![feature(widening_mul)] - #[doc = concat!("assert_eq!(", stringify!($SelfT), "::widening_mul(1 << ", stringify!($BITS_MINUS_ONE), ", 6), (0, 3));")] - #[doc = concat!("assert_eq!(", stringify!($SelfT), "::overflowing_mul(1 << ", stringify!($BITS_MINUS_ONE), ", 6), (0, true));")] - #[doc = concat!("assert_eq!(", stringify!($SelfT), "::wrapping_mul(1 << ", stringify!($BITS_MINUS_ONE), ", 6), 0);")] - #[doc = concat!("assert_eq!(", stringify!($SelfT), "::checked_mul(1 << ", stringify!($BITS_MINUS_ONE), ", 6), None);")] - /// ``` - /// - /// Please note that this example is shared among integer types, which is why `u32` is used. - /// - /// ``` - /// #![feature(widening_mul)] - /// assert_eq!(5u32.widening_mul(2), (10, 0)); - /// assert_eq!(1_000_000_000u32.widening_mul(10), (1410065408, 2)); - /// ``` - #[unstable(feature = "widening_mul", issue = "152016")] - #[rustc_const_unstable(feature = "widening_mul", issue = "152016")] - #[must_use = "this returns the result of the operation, \ - without modifying the original"] - #[inline] - pub const fn widening_mul(self, rhs: Self) -> (Self, Self) { - Self::carrying_mul_add(self, rhs, 0, 0) - } - /// Calculates the "full multiplication" `self * rhs + carry` /// without the possibility to overflow. /// diff --git a/library/core/src/slice/iter.rs b/library/core/src/slice/iter.rs index ac096afb38af0..18abdbed5af6f 100644 --- a/library/core/src/slice/iter.rs +++ b/library/core/src/slice/iter.rs @@ -5,9 +5,7 @@ mod macros; use super::{from_raw_parts, from_raw_parts_mut}; use crate::hint::assert_unchecked; -use crate::iter::{ - FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce, UncheckedIterator, -}; +use crate::iter::{FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce}; use crate::marker::PhantomData; use crate::mem::{self, SizedTypeProperties}; use crate::num::NonZero; diff --git a/library/core/src/slice/iter/macros.rs b/library/core/src/slice/iter/macros.rs index 236bdf9d89cae..1c8a4d5ba3c2b 100644 --- a/library/core/src/slice/iter/macros.rs +++ b/library/core/src/slice/iter/macros.rs @@ -238,7 +238,7 @@ macro_rules! iterator { // SAFETY: We are in bounds. `post_inc_start` does the right thing even for ZSTs. unsafe { self.post_inc_start(n); - Some(self.next_unchecked()) + Some(self.post_inc_start(1).$into_ref()) } } @@ -481,16 +481,6 @@ macro_rules! iterator { #[unstable(feature = "trusted_len", issue = "37572")] unsafe impl TrustedLen for $name<'_, T> {} - impl<'a, T> UncheckedIterator for $name<'a, T> { - #[inline] - unsafe fn next_unchecked(&mut self) -> $elem { - // SAFETY: The caller promised there's at least one more item. - unsafe { - self.post_inc_start(1).$into_ref() - } - } - } - #[stable(feature = "default_iters", since = "1.70.0")] impl Default for $name<'_, T> { /// Creates an empty slice iterator. diff --git a/library/core/src/sync/atomic.rs b/library/core/src/sync/atomic.rs index 004772267da74..8a9a0b52ffec3 100644 --- a/library/core/src/sync/atomic.rs +++ b/library/core/src/sync/atomic.rs @@ -622,7 +622,7 @@ impl AtomicBool { /// assert_eq!(some_bool, false); /// ``` #[inline] - #[cfg(target_has_atomic_equal_alignment = "8")] + #[cfg(target_has_atomic_primitive_alignment = "8")] #[unstable(feature = "atomic_from_mut", issue = "76314")] pub fn from_mut(v: &mut bool) -> &mut Self { // SAFETY: the mutable reference guarantees unique ownership, and @@ -682,7 +682,7 @@ impl AtomicBool { /// assert_eq!(some_bools, [true; 10]); /// ``` #[inline] - #[cfg(target_has_atomic_equal_alignment = "8")] + #[cfg(target_has_atomic_primitive_alignment = "8")] #[unstable(feature = "atomic_from_mut", issue = "76314")] pub fn from_mut_slice(v: &mut [bool]) -> &mut [Self] { // SAFETY: the mutable reference guarantees unique ownership, and @@ -1593,7 +1593,7 @@ impl AtomicPtr { /// assert_eq!(unsafe { *some_ptr }, 456); /// ``` #[inline] - #[cfg(target_has_atomic_equal_alignment = "ptr")] + #[cfg(target_has_atomic_primitive_alignment = "ptr")] #[unstable(feature = "atomic_from_mut", issue = "76314")] pub fn from_mut(v: &mut *mut T) -> &mut Self { let [] = [(); align_of::>() - align_of::<*mut ()>()]; @@ -1672,7 +1672,7 @@ impl AtomicPtr { /// } /// ``` #[inline] - #[cfg(target_has_atomic_equal_alignment = "ptr")] + #[cfg(target_has_atomic_primitive_alignment = "ptr")] #[unstable(feature = "atomic_from_mut", issue = "76314")] pub fn from_mut_slice(v: &mut [*mut T]) -> &mut [Self] { // SAFETY: @@ -3621,7 +3621,7 @@ macro_rules! atomic_int { #[cfg(target_has_atomic_load_store = "8")] atomic_int! { cfg(target_has_atomic = "8"), - cfg(target_has_atomic_equal_alignment = "8"), + cfg(target_has_atomic_primitive_alignment = "8"), stable(feature = "integer_atomics_stable", since = "1.34.0"), stable(feature = "integer_atomics_stable", since = "1.34.0"), stable(feature = "integer_atomics_stable", since = "1.34.0"), @@ -3639,7 +3639,7 @@ atomic_int! { #[cfg(target_has_atomic_load_store = "8")] atomic_int! { cfg(target_has_atomic = "8"), - cfg(target_has_atomic_equal_alignment = "8"), + cfg(target_has_atomic_primitive_alignment = "8"), stable(feature = "integer_atomics_stable", since = "1.34.0"), stable(feature = "integer_atomics_stable", since = "1.34.0"), stable(feature = "integer_atomics_stable", since = "1.34.0"), @@ -3657,7 +3657,7 @@ atomic_int! { #[cfg(target_has_atomic_load_store = "16")] atomic_int! { cfg(target_has_atomic = "16"), - cfg(target_has_atomic_equal_alignment = "16"), + cfg(target_has_atomic_primitive_alignment = "16"), stable(feature = "integer_atomics_stable", since = "1.34.0"), stable(feature = "integer_atomics_stable", since = "1.34.0"), stable(feature = "integer_atomics_stable", since = "1.34.0"), @@ -3675,7 +3675,7 @@ atomic_int! { #[cfg(target_has_atomic_load_store = "16")] atomic_int! { cfg(target_has_atomic = "16"), - cfg(target_has_atomic_equal_alignment = "16"), + cfg(target_has_atomic_primitive_alignment = "16"), stable(feature = "integer_atomics_stable", since = "1.34.0"), stable(feature = "integer_atomics_stable", since = "1.34.0"), stable(feature = "integer_atomics_stable", since = "1.34.0"), @@ -3693,7 +3693,7 @@ atomic_int! { #[cfg(target_has_atomic_load_store = "32")] atomic_int! { cfg(target_has_atomic = "32"), - cfg(target_has_atomic_equal_alignment = "32"), + cfg(target_has_atomic_primitive_alignment = "32"), stable(feature = "integer_atomics_stable", since = "1.34.0"), stable(feature = "integer_atomics_stable", since = "1.34.0"), stable(feature = "integer_atomics_stable", since = "1.34.0"), @@ -3711,7 +3711,7 @@ atomic_int! { #[cfg(target_has_atomic_load_store = "32")] atomic_int! { cfg(target_has_atomic = "32"), - cfg(target_has_atomic_equal_alignment = "32"), + cfg(target_has_atomic_primitive_alignment = "32"), stable(feature = "integer_atomics_stable", since = "1.34.0"), stable(feature = "integer_atomics_stable", since = "1.34.0"), stable(feature = "integer_atomics_stable", since = "1.34.0"), @@ -3729,7 +3729,7 @@ atomic_int! { #[cfg(target_has_atomic_load_store = "64")] atomic_int! { cfg(target_has_atomic = "64"), - cfg(target_has_atomic_equal_alignment = "64"), + cfg(target_has_atomic_primitive_alignment = "64"), stable(feature = "integer_atomics_stable", since = "1.34.0"), stable(feature = "integer_atomics_stable", since = "1.34.0"), stable(feature = "integer_atomics_stable", since = "1.34.0"), @@ -3747,7 +3747,7 @@ atomic_int! { #[cfg(target_has_atomic_load_store = "64")] atomic_int! { cfg(target_has_atomic = "64"), - cfg(target_has_atomic_equal_alignment = "64"), + cfg(target_has_atomic_primitive_alignment = "64"), stable(feature = "integer_atomics_stable", since = "1.34.0"), stable(feature = "integer_atomics_stable", since = "1.34.0"), stable(feature = "integer_atomics_stable", since = "1.34.0"), @@ -3765,7 +3765,7 @@ atomic_int! { #[cfg(target_has_atomic_load_store = "128")] atomic_int! { cfg(target_has_atomic = "128"), - cfg(target_has_atomic_equal_alignment = "128"), + cfg(target_has_atomic_primitive_alignment = "128"), unstable(feature = "integer_atomics", issue = "99069"), unstable(feature = "integer_atomics", issue = "99069"), unstable(feature = "integer_atomics", issue = "99069"), @@ -3783,7 +3783,7 @@ atomic_int! { #[cfg(target_has_atomic_load_store = "128")] atomic_int! { cfg(target_has_atomic = "128"), - cfg(target_has_atomic_equal_alignment = "128"), + cfg(target_has_atomic_primitive_alignment = "128"), unstable(feature = "integer_atomics", issue = "99069"), unstable(feature = "integer_atomics", issue = "99069"), unstable(feature = "integer_atomics", issue = "99069"), @@ -3805,7 +3805,7 @@ macro_rules! atomic_int_ptr_sized { #[cfg(target_pointer_width = $target_pointer_width)] atomic_int! { cfg(target_has_atomic = "ptr"), - cfg(target_has_atomic_equal_alignment = "ptr"), + cfg(target_has_atomic_primitive_alignment = "ptr"), stable(feature = "rust1", since = "1.0.0"), stable(feature = "extended_compare_and_swap", since = "1.10.0"), stable(feature = "atomic_debug", since = "1.3.0"), @@ -3823,7 +3823,7 @@ macro_rules! atomic_int_ptr_sized { #[cfg(target_pointer_width = $target_pointer_width)] atomic_int! { cfg(target_has_atomic = "ptr"), - cfg(target_has_atomic_equal_alignment = "ptr"), + cfg(target_has_atomic_primitive_alignment = "ptr"), stable(feature = "rust1", since = "1.0.0"), stable(feature = "extended_compare_and_swap", since = "1.10.0"), stable(feature = "atomic_debug", since = "1.3.0"), diff --git a/library/core/src/wtf8.rs b/library/core/src/wtf8.rs index a0978c3dafb48..698e17a6b8e6d 100644 --- a/library/core/src/wtf8.rs +++ b/library/core/src/wtf8.rs @@ -454,25 +454,50 @@ impl Wtf8 { #[track_caller] #[inline] pub fn check_utf8_boundary(&self, index: usize) { + let Err(err) = self.try_check_utf8_boundary(index) else { return }; + match err { + Utf8BoundaryError::NotABoundary => { + panic!("byte index {index} is not a codepoint boundary") + } + Utf8BoundaryError::OutOfBounds => panic!("byte index {index} is out of bounds"), + Utf8BoundaryError::BetweenSurrogates => { + panic!("byte index {index} lies between surrogate codepoints") + } + } + } + + #[track_caller] + #[inline] + pub fn try_check_utf8_boundary(&self, index: usize) -> Result<(), Utf8BoundaryError> { if index == 0 { - return; + return Ok(()); } match self.bytes.get(index) { Some(0xED) => (), // Might be a surrogate - Some(&b) if (b as i8) >= -0x40 => return, - Some(_) => panic!("byte index {index} is not a codepoint boundary"), - None if index == self.len() => return, - None => panic!("byte index {index} is out of bounds"), + Some(&b) if (b as i8) >= -0x40 => return Ok(()), + Some(_) => return Err(Utf8BoundaryError::NotABoundary), + None if index == self.len() => return Ok(()), + None => return Err(Utf8BoundaryError::OutOfBounds), } if self.bytes[index + 1] >= 0xA0 { // There's a surrogate after index. Now check before index. if index >= 3 && self.bytes[index - 3] == 0xED && self.bytes[index - 2] >= 0xA0 { - panic!("byte index {index} lies between surrogate codepoints"); + return Err(Utf8BoundaryError::BetweenSurrogates); } } + Ok(()) } } +// This error type is only used temporarily to provide better panic messages +// It does not implement Error. +#[derive(Debug)] +pub enum Utf8BoundaryError { + NotABoundary, + OutOfBounds, + BetweenSurrogates, +} + /// Copied from core::str::raw::slice_unchecked #[inline] unsafe fn slice_unchecked(s: &Wtf8, begin: usize, end: usize) -> &Wtf8 { diff --git a/library/coretests/tests/lib.rs b/library/coretests/tests/lib.rs index 12b81fea9d27c..18e09c707ebad 100644 --- a/library/coretests/tests/lib.rs +++ b/library/coretests/tests/lib.rs @@ -126,7 +126,6 @@ #![feature(unicode_internals)] #![feature(unsize)] #![feature(unwrap_infallible)] -#![feature(widening_mul)] // tidy-alphabetical-end #![allow(internal_features)] #![deny(fuzzy_provenance_casts)] diff --git a/library/coretests/tests/num/int_macros.rs b/library/coretests/tests/num/int_macros.rs index 2b641f595eba4..1334881aad68d 100644 --- a/library/coretests/tests/num/int_macros.rs +++ b/library/coretests/tests/num/int_macros.rs @@ -433,12 +433,6 @@ macro_rules! int_module { assert_eq_const_safe!(($T, bool): (0 as $T).borrowing_sub(MIN, true), (MAX, false)); } - fn test_widening_mul() { - assert_eq_const_safe!(($U, $T): MAX.widening_mul(MAX), (1, MAX / 2)); - assert_eq_const_safe!(($U, $T): MIN.widening_mul(MAX), (MIN as $U, MIN / 2)); - assert_eq_const_safe!(($U, $T): MIN.widening_mul(MIN), (0, MAX / 2 + 1)); - } - fn test_carrying_mul() { assert_eq_const_safe!(($U, $T): MAX.carrying_mul(MAX, 0), (1, MAX / 2)); assert_eq_const_safe!(($U, $T): diff --git a/library/coretests/tests/num/uint_macros.rs b/library/coretests/tests/num/uint_macros.rs index 8dfa0924bd369..b686df0d14f2d 100644 --- a/library/coretests/tests/num/uint_macros.rs +++ b/library/coretests/tests/num/uint_macros.rs @@ -504,10 +504,6 @@ macro_rules! uint_module { assert_eq_const_safe!(($T, bool): $T::MAX.borrowing_sub($T::MAX, true), ($T::MAX, true)); } - fn test_widening_mul() { - assert_eq_const_safe!(($T, $T): $T::MAX.widening_mul($T::MAX), (1, $T::MAX - 1)); - } - fn test_carrying_mul() { assert_eq_const_safe!(($T, $T): $T::MAX.carrying_mul($T::MAX, 0), (1, $T::MAX - 1)); assert_eq_const_safe!(($T, $T): $T::MAX.carrying_mul($T::MAX, $T::MAX), (0, $T::MAX)); diff --git a/library/std/src/collections/hash/map.rs b/library/std/src/collections/hash/map.rs index 09692b0d290d8..a7a9b7901d7f4 100644 --- a/library/std/src/collections/hash/map.rs +++ b/library/std/src/collections/hash/map.rs @@ -493,15 +493,16 @@ impl HashMap { /// ``` /// use std::collections::HashMap; /// - /// let map = HashMap::from([ + /// let map: HashMap<&str, i32> = HashMap::from([ /// ("a", 1), /// ("b", 2), /// ("c", 3), /// ]); /// - /// for key in map.keys() { - /// println!("{key}"); - /// } + /// let mut values: Vec<_> = map.keys().copied().collect(); + /// values.sort(); + /// + /// assert_eq!(values, vec!["a", "b", "c"]); /// ``` /// /// # Performance @@ -555,15 +556,16 @@ impl HashMap { /// ``` /// use std::collections::HashMap; /// - /// let map = HashMap::from([ + /// let map: HashMap<&str, i32> = HashMap::from([ /// ("a", 1), /// ("b", 2), /// ("c", 3), /// ]); /// - /// for val in map.values() { - /// println!("{val}"); - /// } + /// let mut values: Vec<_> = map.values().copied().collect(); + /// values.sort(); + /// + /// assert_eq!(values, vec![1, 2, 3]); /// ``` /// /// # Performance @@ -591,12 +593,12 @@ impl HashMap { /// ]); /// /// for val in map.values_mut() { - /// *val = *val + 10; + /// *val += 10; /// } /// - /// for val in map.values() { - /// println!("{val}"); - /// } + /// assert_eq!(map.get("a"), Some(&11)); + /// assert_eq!(map.get("b"), Some(&12)); + /// assert_eq!(map.get("c"), Some(&13)); /// ``` /// /// # Performance @@ -656,9 +658,13 @@ impl HashMap { /// ("c", 3), /// ]); /// - /// for (key, val) in map.iter() { - /// println!("key: {key} val: {val}"); + /// let mut count = 0; + /// + /// for (_key, _val) in map.iter() { + /// count += 1; /// } + /// + /// assert_eq!(count, 3); /// ``` /// /// # Performance @@ -691,9 +697,9 @@ impl HashMap { /// *val *= 2; /// } /// - /// for (key, val) in &map { - /// println!("key: {key} val: {val}"); - /// } + /// assert_eq!(map.get("a"), Some(&2)); + /// assert_eq!(map.get("b"), Some(&4)); + /// assert_eq!(map.get("c"), Some(&6)); /// ``` /// /// # Performance diff --git a/library/std/src/ffi/os_str.rs b/library/std/src/ffi/os_str.rs index f0cebb05c76a5..be606ae69d6d9 100644 --- a/library/std/src/ffi/os_str.rs +++ b/library/std/src/ffi/os_str.rs @@ -1055,6 +1055,61 @@ impl OsStr { OsString { inner: Buf::from_box(boxed) } } + /// Divides one string slice into two at an index. + /// + /// The two slices returned go from the start of the string slice to `mid`, and from `mid` to the end of the string slice. + /// + /// The argument, `mid`, should be a byte offset from the start of the string. + /// It must also be on a valid `OsStr` boundary. + /// See [`split_at_checked`][Self::split_at_checked] for the definition of a valid boundary. + /// + /// Panics + /// + /// Panics if `mid` is not on a valid boundary, or if it is past the end of the last code point of the string slice. + /// For a non-panicking alternative see [`split_at_checked`][Self::split_at_checked]. + #[unstable(feature = "os_str_split_at", issue = "none")] + pub fn split_at(&self, mid: usize) -> (&OsStr, &OsStr) { + self.inner.check_public_boundary(mid); + + // SAFETY: we've checked it's in bounds and a valid boundary + unsafe { self.split_at_unchecked(mid) } + } + + /// Divides one string slice into two at an index. + /// + /// The two slices returned go from the start of the string slice to `mid`, and from `mid` to the end of the string slice. + /// + /// The argument, `mid`, should be a valid byte offset from the start of the string. + /// It must also be on a valid `OsStr` boundary. + /// The method returns `None` if that’s not the case. + /// A valid `OsStr` boundary is one of: + /// - The start of the string + /// - The end of the string + /// - The start of a valid non-empty UTF-8 substring + /// - Immediately follows a valid non-empty UTF-8 substring + #[unstable(feature = "os_str_split_at", issue = "none")] + pub fn split_at_checked(&self, mid: usize) -> Option<(&OsStr, &OsStr)> { + self.inner.try_check_public_boundary(mid)?; + + // SAFETY: we've checked it's in bounds and a valid boundary + unsafe { Some(self.split_at_unchecked(mid)) } + } + + /// Splits an `OsStr` without checking if `mid` is a valid boundary. + /// You should use `split_at` or `split_at_checked` instead. + /// + /// # Safety + /// + /// Any caller must ensure `mid` is within bounds and lies on + /// a valid `OsStr` boundary for the platform. + unsafe fn split_at_unchecked(&self, mid: usize) -> (&OsStr, &OsStr) { + // SAFETY: it's up to the caller to ensure this is safe. + unsafe { + let (first, second) = self.as_encoded_bytes().split_at_unchecked(mid); + (Self::from_encoded_bytes_unchecked(first), Self::from_encoded_bytes_unchecked(second)) + } + } + /// Converts an OS string slice to a byte slice. To convert the byte slice back into an OS /// string slice, use the [`OsStr::from_encoded_bytes_unchecked`] function. /// diff --git a/library/std/src/ffi/os_str/tests.rs b/library/std/src/ffi/os_str/tests.rs index 3474f0ab50684..1075c43e0b3bf 100644 --- a/library/std/src/ffi/os_str/tests.rs +++ b/library/std/src/ffi/os_str/tests.rs @@ -289,6 +289,108 @@ fn slice_surrogate_edge() { assert_eq!(post_crab.slice_encoded_bytes(4..), surrogate); } +#[test] +fn os_str_slice_at() { + #[track_caller] + fn slice_at_ok(input: &OsStr, index: usize, expected: (&str, &str)) { + let expected = (OsStr::new(expected.0), OsStr::new(expected.1)); + assert_eq!(input.split_at(index), expected); + assert_eq!(input.split_at_checked(index), Some(expected)); + } + + let os_str = OsStr::new("123αƒ’πŸ¦€4"); + slice_at_ok(os_str, 0, ("", "123αƒ’πŸ¦€4")); + slice_at_ok(os_str, 1, ("1", "23αƒ’πŸ¦€4")); + slice_at_ok(os_str, 2, ("12", "3αƒ’πŸ¦€4")); + slice_at_ok(os_str, 3, ("123", "αƒ’πŸ¦€4")); + slice_at_ok(os_str, 6, ("123αƒ’", "πŸ¦€4")); + slice_at_ok(os_str, 10, ("123αƒ’πŸ¦€", "4")); + slice_at_ok(os_str, 11, ("123αƒ’πŸ¦€4", "")); + + // Invalid boundaries should fail. + assert!(os_str.split_at_checked(4).is_none()); + assert!(os_str.split_at_checked(5).is_none()); + assert!(os_str.split_at_checked(7).is_none()); + assert!(os_str.split_at_checked(8).is_none()); + assert!(os_str.split_at_checked(9).is_none()); + // Out of bounds + assert!(os_str.split_at_checked(12).is_none()); +} + +#[test] +#[should_panic] +fn os_str_slice_at_out_of_bounds() { + let crab = OsStr::new("πŸ¦€"); + let _ = crab.split_at(5); +} + +#[test] +#[should_panic] +fn os_str_slice_at_mid_char() { + let crab = OsStr::new("πŸ¦€"); + let _ = crab.split_at(2); +} + +#[cfg(unix)] +#[test] +fn os_str_slice_at_unix() { + use crate::os::unix::ffi::OsStrExt; + + let broken_utf8 = OsStr::from_bytes(&"πŸ¦€".as_bytes()[..3]); + let invalid = OsStr::from_bytes(b"\xFF"); + + // Check that broken UTF-8 isn't treated as if it's valid. + let mut os_string = invalid.to_os_string(); + os_string.push(broken_utf8); + assert_eq!(os_string.split_at_checked(1), None); + + // We should be able to split on ascii with invalid UTF-8 between + let os_string = OsStr::from_bytes(b"a\xFFa"); + assert_eq!(os_string.split_at_checked(1), Some(("a".as_ref(), OsStr::from_bytes(b"\xFFa")))); + assert_eq!(os_string.split_at_checked(2), Some((OsStr::from_bytes(b"a\xFF"), "a".as_ref(),))); + + let os_string = OsStr::from_bytes(&"abcπŸ¦€".as_bytes()[..6]); + assert_eq!( + os_string.split_at_checked(3), + Some(("abc".as_ref(), OsStr::from_bytes(b"\xF0\x9F\xA6"))) + ); + + let mut os_string = invalid.to_os_string(); + os_string.push("πŸ¦€"); + assert_eq!(os_string.split_at_checked(1), Some((invalid, "πŸ¦€".as_ref()))); +} + +#[test] +#[cfg(windows)] +fn os_str_slice_at_windows() { + use crate::os::windows::ffi::OsStringExt; + + // slicing between unpaired surrogates should not be possible + // checking is implemented as a loop so we're agnostic towards + // the internal encoding + let os_string = OsString::from_wide(&[0xD800, 0xD800]); + for i in 1..os_string.len() { + assert_eq!(os_string.split_at_checked(i), None); + } + // For completeness, check that splitting at the start and end still works. + assert!(os_string.split_at_checked(0).is_some()); + assert!(os_string.split_at_checked(os_string.len()).is_some()); + + // check that slicing before and after unpaired surrogates work + let surrogate = OsString::from_wide(&[0xD800]); + + let mut os_string = surrogate.clone(); + os_string.push("πŸ¦€"); + assert_eq!( + os_string.split_at_checked(surrogate.len()), + Some((surrogate.as_ref(), "πŸ¦€".as_ref())) + ); + + let mut os_string = OsString::from("πŸ¦€"); + os_string.push(&surrogate); + assert_eq!(os_string.split_at_checked("πŸ¦€".len()), Some(("πŸ¦€".as_ref(), surrogate.as_ref()))); +} + #[test] fn clone_to_uninit() { let a = OsStr::new("hello.txt"); diff --git a/library/std/src/io/buffered/bufreader.rs b/library/std/src/io/buffered/bufreader.rs index ac461f4f0307e..36bf329a946e0 100644 --- a/library/std/src/io/buffered/bufreader.rs +++ b/library/std/src/io/buffered/bufreader.rs @@ -48,6 +48,7 @@ use crate::io::{ /// } /// ``` #[stable(feature = "rust1", since = "1.0.0")] +#[cfg_attr(not(test), rustc_diagnostic_item = "IoBufReader")] pub struct BufReader { buf: Buffer, inner: R, diff --git a/library/std/src/io/stdio.rs b/library/std/src/io/stdio.rs index 2d80fe49e80a7..845d325e4e5b4 100644 --- a/library/std/src/io/stdio.rs +++ b/library/std/src/io/stdio.rs @@ -285,6 +285,7 @@ pub struct Stdin { /// ``` #[must_use = "if unused stdin will immediately unlock"] #[stable(feature = "rust1", since = "1.0.0")] +#[cfg_attr(not(test), rustc_diagnostic_item = "StdinLock")] pub struct StdinLock<'a> { inner: MutexGuard<'a, BufReader>, } diff --git a/library/std/src/sys/os_str/bytes.rs b/library/std/src/sys/os_str/bytes.rs index 5482663ef0079..a57da01a5d85d 100644 --- a/library/std/src/sys/os_str/bytes.rs +++ b/library/std/src/sys/os_str/bytes.rs @@ -238,16 +238,24 @@ impl Slice { #[track_caller] #[inline] pub fn check_public_boundary(&self, index: usize) { + if self.try_check_public_boundary(index).is_none() { + panic!("byte index {index} is not an OsStr boundary"); + } + } + + #[track_caller] + #[inline] + pub fn try_check_public_boundary(&self, index: usize) -> Option<()> { if index == 0 || index == self.inner.len() { - return; + return Some(()); } if index < self.inner.len() && (self.inner[index - 1].is_ascii() || self.inner[index].is_ascii()) { - return; + return Some(()); } - slow_path(&self.inner, index); + return slow_path(&self.inner, index); /// We're betting that typical splits will involve an ASCII character. /// @@ -255,26 +263,26 @@ impl Slice { /// better assembly. #[track_caller] #[inline(never)] - fn slow_path(bytes: &[u8], index: usize) { - let (before, after) = bytes.split_at(index); + fn slow_path(bytes: &[u8], index: usize) -> Option<()> { + let (before, after) = bytes.split_at_checked(index)?; // UTF-8 takes at most 4 bytes per codepoint, so we don't // need to check more than that. let after = after.get(..4).unwrap_or(after); match str::from_utf8(after) { - Ok(_) => return, - Err(err) if err.valid_up_to() != 0 => return, + Ok(_) => return Some(()), + Err(err) if err.valid_up_to() != 0 => return Some(()), Err(_) => (), } for len in 2..=4.min(index) { let before = &before[index - len..]; if str::from_utf8(before).is_ok() { - return; + return Some(()); } } - panic!("byte index {index} is not an OsStr boundary"); + None } } diff --git a/library/std/src/sys/os_str/utf8.rs b/library/std/src/sys/os_str/utf8.rs index a324a478325e6..289f58aa480f7 100644 --- a/library/std/src/sys/os_str/utf8.rs +++ b/library/std/src/sys/os_str/utf8.rs @@ -224,10 +224,16 @@ impl Slice { Slice::from_str(unsafe { str::from_utf8_unchecked(s) }) } + #[track_caller] + #[inline] + pub fn try_check_public_boundary(&self, index: usize) -> Option<()> { + if self.inner.is_char_boundary(index) { Some(()) } else { None } + } + #[track_caller] #[inline] pub fn check_public_boundary(&self, index: usize) { - if !self.inner.is_char_boundary(index) { + if self.try_check_public_boundary(index).is_none() { panic!("byte index {index} is not an OsStr boundary"); } } diff --git a/library/std/src/sys/os_str/wtf8.rs b/library/std/src/sys/os_str/wtf8.rs index 1f130d91cf393..9a32ab3f3ea12 100644 --- a/library/std/src/sys/os_str/wtf8.rs +++ b/library/std/src/sys/os_str/wtf8.rs @@ -240,6 +240,11 @@ impl Slice { unsafe { mem::transmute(Wtf8::from_bytes_unchecked(s)) } } + #[inline] + pub fn try_check_public_boundary(&self, index: usize) -> Option<()> { + self.inner.try_check_utf8_boundary(index).ok() + } + #[track_caller] #[inline] pub fn check_public_boundary(&self, index: usize) { diff --git a/library/stdarch/.github/workflows/main.yml b/library/stdarch/.github/workflows/main.yml index b5dbefef4eea3..5c84e856d54bb 100644 --- a/library/stdarch/.github/workflows/main.yml +++ b/library/stdarch/.github/workflows/main.yml @@ -279,7 +279,6 @@ jobs: - aarch64-unknown-linux-gnu - aarch64_be-unknown-linux-gnu - armv7-unknown-linux-gnueabihf - - arm-unknown-linux-gnueabihf - x86_64-unknown-linux-gnu profile: [dev, release] include: diff --git a/library/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/library/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile index 2768c521ebccc..e2b3d95585efe 100644 --- a/library/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile +++ b/library/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile @@ -1,17 +1,21 @@ FROM ubuntu:25.10 RUN apt-get update && apt-get install -y --no-install-recommends \ gcc \ - g++ \ ca-certificates \ libc6-dev \ gcc-aarch64-linux-gnu \ - g++-aarch64-linux-gnu \ libc6-dev-arm64-cross \ qemu-user \ make \ file \ - clang \ - lld + xz-utils \ + wget + +RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz +RUN mkdir llvm +RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm + +ENV PATH="/llvm/bin:$PATH" ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \ CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -cpu max -L /usr/aarch64-linux-gnu" \ diff --git a/library/stdarch/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile b/library/stdarch/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile index f85c6a2592e99..d7c12493ad9cf 100644 --- a/library/stdarch/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile +++ b/library/stdarch/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile @@ -2,17 +2,15 @@ FROM ubuntu:25.10 RUN apt-get update && apt-get install -y --no-install-recommends \ gcc \ - g++ \ ca-certificates \ libc6-dev \ libc6-dev-arm64-cross \ qemu-user \ make \ file \ - clang \ curl \ xz-utils \ - lld + wget ENV TOOLCHAIN="arm-gnu-toolchain-14.3.rel1-x86_64-aarch64_be-none-linux-gnu" @@ -21,6 +19,12 @@ RUN curl -L "https://developer.arm.com/-/media/Files/downloads/gnu/14.3.rel1/bin RUN tar -xvf "${TOOLCHAIN}.tar.xz" RUN mkdir /toolchains && mv "./${TOOLCHAIN}" /toolchains +RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz +RUN mkdir llvm +RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm + +ENV PATH="/llvm/bin:$PATH" + ENV AARCH64_BE_TOOLCHAIN="/toolchains/${TOOLCHAIN}" ENV AARCH64_BE_LIBC="${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc" diff --git a/library/stdarch/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile b/library/stdarch/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile index 6d4ff24828672..23e4d5a341152 100644 --- a/library/stdarch/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile +++ b/library/stdarch/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile @@ -7,7 +7,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libc6-dev-armhf-cross \ qemu-user \ make \ - file + file \ + clang \ + lld ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \ CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -cpu max -L /usr/arm-linux-gnueabihf" \ OBJDUMP=arm-linux-gnueabihf-objdump diff --git a/library/stdarch/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/library/stdarch/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile index 602249c0ece5a..02744917af6df 100644 --- a/library/stdarch/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile +++ b/library/stdarch/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile @@ -1,17 +1,21 @@ FROM ubuntu:24.04 RUN apt-get update && apt-get install -y --no-install-recommends \ gcc \ - g++ \ ca-certificates \ libc6-dev \ gcc-arm-linux-gnueabihf \ - g++-arm-linux-gnueabihf \ libc6-dev-armhf-cross \ qemu-user \ make \ file \ - clang \ - lld + wget + +RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz +RUN mkdir llvm +RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm + +ENV PATH="/llvm/bin:$PATH" + ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \ CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -cpu max -L /usr/arm-linux-gnueabihf" \ OBJDUMP=arm-linux-gnueabihf-objdump diff --git a/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile index 17c6d25215aeb..17d1ac67e714f 100644 --- a/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile +++ b/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile @@ -6,15 +6,18 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ make \ ca-certificates \ wget \ - xz-utils \ - clang \ - libstdc++-14-dev \ - build-essential \ - lld + xz-utils RUN wget http://ci-mirrors.rust-lang.org/sde-external-10.8.0-2026-03-15-lin.tar.xz -O sde.tar.xz RUN mkdir intel-sde RUN tar -xJf sde.tar.xz --strip-components=1 -C intel-sde + +RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz +RUN mkdir llvm +RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm + +ENV PATH="/llvm/bin:$PATH" + ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/intel-sde/sde64 \ -cpuid-in /checkout/ci/docker/x86_64-unknown-linux-gnu/cpuid.def \ -rtm-mode full -tsx --" diff --git a/library/stdarch/ci/intrinsic-test-docker.sh b/library/stdarch/ci/intrinsic-test-docker.sh index beeff42c76212..948b53dc67bc9 100755 --- a/library/stdarch/ci/intrinsic-test-docker.sh +++ b/library/stdarch/ci/intrinsic-test-docker.sh @@ -48,7 +48,7 @@ run() { --workdir /checkout \ --privileged \ stdarch \ - sh -c "HOME=/tmp PATH=\$PATH:/rust/bin exec ci/intrinsic-test.sh ${1}" + sh -c "HOME=/tmp PATH=\$PATH:/rust/bin exec ci/intrinsic-test.sh" } if [ -z "$1" ]; then diff --git a/library/stdarch/ci/intrinsic-test.sh b/library/stdarch/ci/intrinsic-test.sh index 89104e2672ad9..1f3a2caf50654 100755 --- a/library/stdarch/ci/intrinsic-test.sh +++ b/library/stdarch/ci/intrinsic-test.sh @@ -5,127 +5,56 @@ set -ex : "${TARGET?The TARGET environment variable must be set.}" export RUSTFLAGS="${RUSTFLAGS} -D warnings -Z merge-functions=disabled -Z verify-llvm-ir" -export HOST_RUSTFLAGS="${RUSTFLAGS}" export PROFILE="${PROFILE:="release"}" -case ${TARGET} in - # On 32-bit use a static relocation model which avoids some extra - # instructions when dealing with static data, notably allowing some - # instruction assertion checks to pass below the 20 instruction limit. If - # this is the default, dynamic, then too many instructions are generated - # when we assert the instruction for a function and it causes tests to fail. - i686-* | i586-*) - export RUSTFLAGS="${RUSTFLAGS} -C relocation-model=static" - ;; - # Some x86_64 targets enable by default more features beyond SSE2, - # which cause some instruction assertion checks to fail. - x86_64-*) - export RUSTFLAGS="${RUSTFLAGS} -C target-feature=-sse3" - ;; - #Unoptimized build uses fast-isel which breaks with msa - mips-* | mipsel-*) - export RUSTFLAGS="${RUSTFLAGS} -C llvm-args=-fast-isel=false" - ;; - armv7-*eabihf | thumbv7-*eabihf) - export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+neon" - ;; - # Some of our test dependencies use the deprecated `gcc` crates which - # doesn't detect RISC-V compilers automatically, so do it manually here. - riscv*) - export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+zk,+zks,+zbb,+zbc" - ;; -esac - echo "RUSTFLAGS=${RUSTFLAGS}" -echo "OBJDUMP=${OBJDUMP}" echo "PROFILE=${PROFILE}" INTRINSIC_TEST="--manifest-path=crates/intrinsic-test/Cargo.toml" -# Test targets compiled with extra features. +export CC="clang" + case ${TARGET} in - # Setup aarch64 & armv7 specific variables, the runner, along with some - # tests to skip - aarch64-unknown-linux-gnu*) - TEST_CPPFLAGS="-fuse-ld=lld -I/usr/aarch64-linux-gnu/include/ -I/usr/aarch64-linux-gnu/include/c++/9/aarch64-linux-gnu/" - TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt - TEST_CXX_COMPILER="clang++" - TEST_RUNNER="${CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER}" - : "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=100}" + aarch64_be*) + export CFLAGS="-I${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc/usr/include --sysroot={AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc -Wno-nonportable-vector-initialization" + TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64_be.txt ;; - aarch64_be-unknown-linux-gnu*) - TEST_CPPFLAGS="-fuse-ld=lld" - TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64_be.txt - TEST_CXX_COMPILER="clang++" - TEST_RUNNER="${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER}" - : "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=100}" + aarch64*) + export CFLAGS="-I/usr/aarch64-linux-gnu/include/" + TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt ;; - armv7-unknown-linux-gnueabihf*) - TEST_CPPFLAGS="-fuse-ld=lld -I/usr/arm-linux-gnueabihf/include/ -I/usr/arm-linux-gnueabihf/include/c++/9/arm-linux-gnueabihf/" + armv7*) + export CFLAGS="-I/usr/arm-linux-gnueabihf/include/" TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_arm.txt - TEST_CXX_COMPILER="clang++" - TEST_RUNNER="${CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER}" - : "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=100}" ;; - x86_64-unknown-linux-gnu*) - TEST_CPPFLAGS="-fuse-ld=lld -I/usr/include/x86_64-linux-gnu/" - TEST_CXX_COMPILER="clang++" - TEST_RUNNER="${CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER}" + x86_64*) + export CFLAGS="-I/usr/include/x86_64-linux-gnu/" TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_x86.txt - : "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=20}" ;; *) ;; esac -# Arm specific case "${TARGET}" in - aarch64-unknown-linux-gnu*|armv7-unknown-linux-gnueabihf*) - CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \ - cargo run "${INTRINSIC_TEST}" --release \ - --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \ - --runner "${TEST_RUNNER}" \ - --cppcompiler "${TEST_CXX_COMPILER}" \ - --skip "${TEST_SKIP_INTRINSICS}" \ - --target "${TARGET}" \ - --profile "${PROFILE}" \ - --sample-percentage "${TEST_SAMPLE_INTRINSICS_PERCENTAGE}" - ;; - - aarch64_be-unknown-linux-gnu*) - CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \ - cargo run "${INTRINSIC_TEST}" --release \ - --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \ - --runner "${TEST_RUNNER}" \ - --cppcompiler "${TEST_CXX_COMPILER}" \ - --skip "${TEST_SKIP_INTRINSICS}" \ - --target "${TARGET}" \ - --profile "${PROFILE}" \ - --linker "${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER}" \ - --cxx-toolchain-dir "${AARCH64_BE_TOOLCHAIN}" \ - --sample-percentage "${TEST_SAMPLE_INTRINSICS_PERCENTAGE}" - ;; - x86_64-unknown-linux-gnu*) - # `CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER` is not necessary for `intrinsic-test` - # because the binary needs to run directly on the host. - # Hence the use of `env -u`. env -u CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER \ - CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" \ - RUST_LOG=warn RUST_BACKTRACE=1 \ cargo run "${INTRINSIC_TEST}" --release \ --bin intrinsic-test -- intrinsics_data/x86-intel.xml \ - --runner "${TEST_RUNNER}" \ --skip "${TEST_SKIP_INTRINSICS}" \ - --cppcompiler "${TEST_CXX_COMPILER}" \ - --target "${TARGET}" \ - --profile "${PROFILE}" \ - --sample-percentage "${TEST_SAMPLE_INTRINSICS_PERCENTAGE}" + --target "${TARGET}" + + echo "${CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER}" ;; - *) + *) + cargo run "${INTRINSIC_TEST}" --release \ + --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \ + --skip "${TEST_SKIP_INTRINSICS}" \ + --target "${TARGET}" ;; esac + +cargo test --manifest-path=rust_programs/Cargo.toml --target "${TARGET}" --profile "${PROFILE}" diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs index 898ccfc9edeae..8d701d9b88056 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs @@ -65,12 +65,15 @@ pub fn __jcvt(a: f64) -> i32 { #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sabal2))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(sabal2) +)] pub fn vabal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t { + let d = vget_high_s8(b); + let e = vget_high_s8(c); + let f = vabd_s8(d, e); unsafe { - let d: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); - let e: int8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]); - let f: int8x8_t = vabd_s8(d, e); let f: uint8x8_t = simd_cast(f); simd_add(a, simd_cast(f)) } @@ -80,12 +83,15 @@ pub fn vabal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t { #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sabal2))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(sabal2) +)] pub fn vabal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t { + let d = vget_high_s16(b); + let e = vget_high_s16(c); + let f = vabd_s16(d, e); unsafe { - let d: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]); - let e: int16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]); - let f: int16x4_t = vabd_s16(d, e); let f: uint16x4_t = simd_cast(f); simd_add(a, simd_cast(f)) } @@ -95,12 +101,15 @@ pub fn vabal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t { #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sabal2))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(sabal2) +)] pub fn vabal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t { + let d = vget_high_s32(b); + let e = vget_high_s32(c); + let f = vabd_s32(d, e); unsafe { - let d: int32x2_t = simd_shuffle!(b, b, [2, 3]); - let e: int32x2_t = simd_shuffle!(c, c, [2, 3]); - let f: int32x2_t = vabd_s32(d, e); let f: uint32x2_t = simd_cast(f); simd_add(a, simd_cast(f)) } @@ -110,42 +119,45 @@ pub fn vabal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t { #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uabal2))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uabal2) +)] pub fn vabal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t { - unsafe { - let d: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); - let e: uint8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]); - let f: uint8x8_t = vabd_u8(d, e); - simd_add(a, simd_cast(f)) - } + let d = vget_high_u8(b); + let e = vget_high_u8(c); + let f = vabd_u8(d, e); + unsafe { simd_add(a, simd_cast(f)) } } #[doc = "Unsigned Absolute difference and Accumulate Long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_high_u16)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uabal2))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uabal2) +)] pub fn vabal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t { - unsafe { - let d: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]); - let e: uint16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]); - let f: uint16x4_t = vabd_u16(d, e); - simd_add(a, simd_cast(f)) - } + let d = vget_high_u16(b); + let e = vget_high_u16(c); + let f = vabd_u16(d, e); + unsafe { simd_add(a, simd_cast(f)) } } #[doc = "Unsigned Absolute difference and Accumulate Long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_high_u32)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uabal2))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uabal2) +)] pub fn vabal_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t { - unsafe { - let d: uint32x2_t = simd_shuffle!(b, b, [2, 3]); - let e: uint32x2_t = simd_shuffle!(c, c, [2, 3]); - let f: uint32x2_t = vabd_u32(d, e); - simd_add(a, simd_cast(f)) - } + let d = vget_high_u32(b); + let e = vget_high_u32(c); + let f = vabd_u32(d, e); + unsafe { simd_add(a, simd_cast(f)) } } #[doc = "Absolute difference between the arguments of Floating"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_f64)"] @@ -186,7 +198,7 @@ pub fn vabdq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(fabd))] pub fn vabdd_f64(a: f64, b: f64) -> f64 { - unsafe { simd_extract!(vabd_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) } + vget_lane_f64::<0>(vabd_f64(vdup_n_f64(a), vdup_n_f64(b))) } #[doc = "Floating-point absolute difference"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabds_f32)"] @@ -195,7 +207,7 @@ pub fn vabdd_f64(a: f64, b: f64) -> f64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(fabd))] pub fn vabds_f32(a: f32, b: f32) -> f32 { - unsafe { simd_extract!(vabd_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) } + vget_lane_f32::<0>(vabd_f32(vdup_n_f32(a), vdup_n_f32(b))) } #[doc = "Floating-point absolute difference"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdh_f16)"] @@ -205,47 +217,47 @@ pub fn vabds_f32(a: f32, b: f32) -> f32 { #[cfg(not(target_arch = "arm64ec"))] #[cfg_attr(test, assert_instr(fabd))] pub fn vabdh_f16(a: f16, b: f16) -> f16 { - unsafe { simd_extract!(vabd_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) } + vget_lane_f16::<0>(vabd_f16(vdup_n_f16(a), vdup_n_f16(b))) } #[doc = "Signed Absolute difference Long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_s8)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(sabdl2))] -pub fn vabdl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sabdl2))] +pub fn vabdl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t { + let c = vget_high_s8(a); + let d = vget_high_s8(b); unsafe { - let c: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]); - let d: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]); - let e: uint16x4_t = simd_cast(vabd_s16(c, d)); + let e: uint8x8_t = simd_cast(vabd_s8(c, d)); simd_cast(e) } } #[doc = "Signed Absolute difference Long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_s16)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(sabdl2))] -pub fn vabdl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sabdl2))] +pub fn vabdl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { + let c = vget_high_s16(a); + let d = vget_high_s16(b); unsafe { - let c: int32x2_t = simd_shuffle!(a, a, [2, 3]); - let d: int32x2_t = simd_shuffle!(b, b, [2, 3]); - let e: uint32x2_t = simd_cast(vabd_s32(c, d)); + let e: uint16x4_t = simd_cast(vabd_s16(c, d)); simd_cast(e) } } #[doc = "Signed Absolute difference Long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_s32)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(sabdl2))] -pub fn vabdl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t { +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sabdl2))] +pub fn vabdl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { + let c = vget_high_s32(a); + let d = vget_high_s32(b); unsafe { - let c: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); - let d: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); - let e: uint8x8_t = simd_cast(vabd_s8(c, d)); + let e: uint32x2_t = simd_cast(vabd_s32(c, d)); simd_cast(e) } } @@ -253,40 +265,34 @@ pub fn vabdl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_u8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(uabdl2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(uabdl2))] pub fn vabdl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t { - unsafe { - let c: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); - let d: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); - simd_cast(vabd_u8(c, d)) - } + let c = vget_high_u8(a); + let d = vget_high_u8(b); + unsafe { simd_cast(vabd_u8(c, d)) } } #[doc = "Unsigned Absolute difference Long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_u16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(uabdl2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(uabdl2))] pub fn vabdl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t { - unsafe { - let c: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]); - let d: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]); - simd_cast(vabd_u16(c, d)) - } + let c = vget_high_u16(a); + let d = vget_high_u16(b); + unsafe { simd_cast(vabd_u16(c, d)) } } #[doc = "Unsigned Absolute difference Long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(uabdl2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(uabdl2))] pub fn vabdl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t { - unsafe { - let c: uint32x2_t = simd_shuffle!(a, a, [2, 3]); - let d: uint32x2_t = simd_shuffle!(b, b, [2, 3]); - simd_cast(vabd_u32(c, d)) - } + let c = vget_high_u32(a); + let d = vget_high_u32(b); + unsafe { simd_cast(vabd_u32(c, d)) } } #[doc = "Floating-point absolute value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabs_f64)"] @@ -1023,6 +1029,7 @@ pub fn vbcaxq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t { #[doc = "Floating-point complex add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot270_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] @@ -1039,8 +1046,33 @@ pub fn vcadd_rot270_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { unsafe { _vcadd_rot270_f16(a, b) } } #[doc = "Floating-point complex add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot270_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fcadd))] +pub fn vcadd_rot270_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcadd.rot270.v4f16" + )] + fn _vcadd_rot270_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = _vcadd_rot270_f16(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot270_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] @@ -1057,8 +1089,33 @@ pub fn vcaddq_rot270_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { unsafe { _vcaddq_rot270_f16(a, b) } } #[doc = "Floating-point complex add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot270_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fcadd))] +pub fn vcaddq_rot270_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcadd.rot270.v8f16" + )] + fn _vcaddq_rot270_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = _vcaddq_rot270_f16(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot270_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcadd))] @@ -1073,8 +1130,31 @@ pub fn vcadd_rot270_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { unsafe { _vcadd_rot270_f32(a, b) } } #[doc = "Floating-point complex add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot270_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcadd))] +pub fn vcadd_rot270_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcadd.rot270.v2f32" + )] + fn _vcadd_rot270_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = _vcadd_rot270_f32(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point complex add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot270_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcadd))] @@ -1089,8 +1169,31 @@ pub fn vcaddq_rot270_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { unsafe { _vcaddq_rot270_f32(a, b) } } #[doc = "Floating-point complex add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot270_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcadd))] +pub fn vcaddq_rot270_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcadd.rot270.v4f32" + )] + fn _vcaddq_rot270_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x4_t = _vcaddq_rot270_f32(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot270_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcadd))] @@ -1105,8 +1208,31 @@ pub fn vcaddq_rot270_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { unsafe { _vcaddq_rot270_f64(a, b) } } #[doc = "Floating-point complex add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot270_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcadd))] +pub fn vcaddq_rot270_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcadd.rot270.v2f64" + )] + fn _vcaddq_rot270_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; + } + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float64x2_t = _vcaddq_rot270_f64(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point complex add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot90_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] @@ -1123,8 +1249,33 @@ pub fn vcadd_rot90_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { unsafe { _vcadd_rot90_f16(a, b) } } #[doc = "Floating-point complex add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot90_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fcadd))] +pub fn vcadd_rot90_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcadd.rot90.v4f16" + )] + fn _vcadd_rot90_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = _vcadd_rot90_f16(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot90_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] @@ -1141,8 +1292,33 @@ pub fn vcaddq_rot90_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { unsafe { _vcaddq_rot90_f16(a, b) } } #[doc = "Floating-point complex add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot90_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fcadd))] +pub fn vcaddq_rot90_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcadd.rot90.v8f16" + )] + fn _vcaddq_rot90_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = _vcaddq_rot90_f16(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot90_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcadd))] @@ -1157,8 +1333,31 @@ pub fn vcadd_rot90_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { unsafe { _vcadd_rot90_f32(a, b) } } #[doc = "Floating-point complex add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot90_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcadd))] +pub fn vcadd_rot90_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcadd.rot90.v2f32" + )] + fn _vcadd_rot90_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = _vcadd_rot90_f32(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point complex add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot90_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcadd))] @@ -1173,8 +1372,31 @@ pub fn vcaddq_rot90_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { unsafe { _vcaddq_rot90_f32(a, b) } } #[doc = "Floating-point complex add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot90_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcadd))] +pub fn vcaddq_rot90_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcadd.rot90.v4f32" + )] + fn _vcaddq_rot90_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x4_t = _vcaddq_rot90_f32(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot90_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcadd))] @@ -1188,6 +1410,28 @@ pub fn vcaddq_rot90_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { } unsafe { _vcaddq_rot90_f64(a, b) } } +#[doc = "Floating-point complex add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot90_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcadd))] +pub fn vcaddq_rot90_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcadd.rot90.v2f64" + )] + fn _vcaddq_rot90_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; + } + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float64x2_t = _vcaddq_rot90_f64(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} #[doc = "Floating-point absolute compare greater than or equal"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcage_f64)"] #[inline] @@ -1521,7 +1765,7 @@ pub fn vceqq_p64(a: poly64x2_t, b: poly64x2_t) -> uint64x2_t { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vceqd_f64(a: f64, b: f64) -> u64 { - unsafe { simd_extract!(vceq_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) } + vget_lane_u64::<0>(vceq_f64(vdup_n_f64(a), vdup_n_f64(b))) } #[doc = "Floating-point compare equal"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqs_f32)"] @@ -1530,7 +1774,7 @@ pub fn vceqd_f64(a: f64, b: f64) -> u64 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vceqs_f32(a: f32, b: f32) -> u32 { - unsafe { simd_extract!(vceq_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) } + vget_lane_u32::<0>(vceq_f32(vdup_n_f32(a), vdup_n_f32(b))) } #[doc = "Compare bitwise equal"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqd_s64)"] @@ -1558,7 +1802,7 @@ pub fn vceqd_u64(a: u64, b: u64) -> u64 { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vceqh_f16(a: f16, b: f16) -> u16 { - unsafe { simd_extract!(vceq_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) } + vget_lane_u16::<0>(vceq_f16(vdup_n_f16(a), vdup_n_f16(b))) } #[doc = "Floating-point compare bitwise equal to zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_f16)"] @@ -1848,7 +2092,7 @@ pub fn vceqzd_u64(a: u64) -> u64 { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vceqzh_f16(a: f16) -> u16 { - unsafe { simd_extract!(vceqz_f16(vdup_n_f16(a)), 0) } + vget_lane_u16::<0>(vceqz_f16(vdup_n_f16(a))) } #[doc = "Floating-point compare bitwise equal to zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzs_f32)"] @@ -1857,7 +2101,7 @@ pub fn vceqzh_f16(a: f16) -> u16 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vceqzs_f32(a: f32) -> u32 { - unsafe { simd_extract!(vceqz_f32(vdup_n_f32(a)), 0) } + vget_lane_u32::<0>(vceqz_f32(vdup_n_f32(a))) } #[doc = "Floating-point compare bitwise equal to zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzd_f64)"] @@ -1866,7 +2110,7 @@ pub fn vceqzs_f32(a: f32) -> u32 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vceqzd_f64(a: f64) -> u64 { - unsafe { simd_extract!(vceqz_f64(vdup_n_f64(a)), 0) } + vget_lane_u64::<0>(vceqz_f64(vdup_n_f64(a))) } #[doc = "Floating-point compare greater than or equal"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_f64)"] @@ -1929,7 +2173,7 @@ pub fn vcgeq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcged_f64(a: f64, b: f64) -> u64 { - unsafe { simd_extract!(vcge_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) } + vget_lane_u64::<0>(vcge_f64(vdup_n_f64(a), vdup_n_f64(b))) } #[doc = "Floating-point compare greater than or equal"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcges_f32)"] @@ -1938,7 +2182,7 @@ pub fn vcged_f64(a: f64, b: f64) -> u64 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcges_f32(a: f32, b: f32) -> u32 { - unsafe { simd_extract!(vcge_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) } + vget_lane_u32::<0>(vcge_f32(vdup_n_f32(a), vdup_n_f32(b))) } #[doc = "Compare greater than or equal"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcged_s64)"] @@ -1966,7 +2210,7 @@ pub fn vcged_u64(a: u64, b: u64) -> u64 { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcgeh_f16(a: f16, b: f16) -> u16 { - unsafe { simd_extract!(vcge_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) } + vget_lane_u16::<0>(vcge_f16(vdup_n_f16(a), vdup_n_f16(b))) } #[doc = "Floating-point compare greater than or equal to zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgez_f32)"] @@ -2095,7 +2339,7 @@ pub fn vcgezq_s64(a: int64x2_t) -> uint64x2_t { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcgezd_f64(a: f64) -> u64 { - unsafe { simd_extract!(vcgez_f64(vdup_n_f64(a)), 0) } + vget_lane_u64::<0>(vcgez_f64(vdup_n_f64(a))) } #[doc = "Floating-point compare greater than or equal to zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgezs_f32)"] @@ -2104,7 +2348,7 @@ pub fn vcgezd_f64(a: f64) -> u64 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcgezs_f32(a: f32) -> u32 { - unsafe { simd_extract!(vcgez_f32(vdup_n_f32(a)), 0) } + vget_lane_u32::<0>(vcgez_f32(vdup_n_f32(a))) } #[doc = "Compare signed greater than or equal to zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgezd_s64)"] @@ -2123,7 +2367,7 @@ pub fn vcgezd_s64(a: i64) -> u64 { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcgezh_f16(a: f16) -> u16 { - unsafe { simd_extract!(vcgez_f16(vdup_n_f16(a)), 0) } + vget_lane_u16::<0>(vcgez_f16(vdup_n_f16(a))) } #[doc = "Floating-point compare greater than"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_f64)"] @@ -2186,7 +2430,7 @@ pub fn vcgtq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcgtd_f64(a: f64, b: f64) -> u64 { - unsafe { simd_extract!(vcgt_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) } + vget_lane_u64::<0>(vcgt_f64(vdup_n_f64(a), vdup_n_f64(b))) } #[doc = "Floating-point compare greater than"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgts_f32)"] @@ -2195,7 +2439,7 @@ pub fn vcgtd_f64(a: f64, b: f64) -> u64 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcgts_f32(a: f32, b: f32) -> u32 { - unsafe { simd_extract!(vcgt_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) } + vget_lane_u32::<0>(vcgt_f32(vdup_n_f32(a), vdup_n_f32(b))) } #[doc = "Compare greater than"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtd_s64)"] @@ -2223,7 +2467,7 @@ pub fn vcgtd_u64(a: u64, b: u64) -> u64 { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcgth_f16(a: f16, b: f16) -> u16 { - unsafe { simd_extract!(vcgt_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) } + vget_lane_u16::<0>(vcgt_f16(vdup_n_f16(a), vdup_n_f16(b))) } #[doc = "Floating-point compare greater than zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtz_f32)"] @@ -2352,7 +2596,7 @@ pub fn vcgtzq_s64(a: int64x2_t) -> uint64x2_t { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcgtzd_f64(a: f64) -> u64 { - unsafe { simd_extract!(vcgtz_f64(vdup_n_f64(a)), 0) } + vget_lane_u64::<0>(vcgtz_f64(vdup_n_f64(a))) } #[doc = "Floating-point compare greater than zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtzs_f32)"] @@ -2361,7 +2605,7 @@ pub fn vcgtzd_f64(a: f64) -> u64 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcgtzs_f32(a: f32) -> u32 { - unsafe { simd_extract!(vcgtz_f32(vdup_n_f32(a)), 0) } + vget_lane_u32::<0>(vcgtz_f32(vdup_n_f32(a))) } #[doc = "Compare signed greater than zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtzd_s64)"] @@ -2380,7 +2624,7 @@ pub fn vcgtzd_s64(a: i64) -> u64 { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcgtzh_f16(a: f16) -> u16 { - unsafe { simd_extract!(vcgtz_f16(vdup_n_f16(a)), 0) } + vget_lane_u16::<0>(vcgtz_f16(vdup_n_f16(a))) } #[doc = "Floating-point compare less than or equal"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_f64)"] @@ -2443,7 +2687,7 @@ pub fn vcleq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcled_f64(a: f64, b: f64) -> u64 { - unsafe { simd_extract!(vcle_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) } + vget_lane_u64::<0>(vcle_f64(vdup_n_f64(a), vdup_n_f64(b))) } #[doc = "Floating-point compare less than or equal"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcles_f32)"] @@ -2452,7 +2696,7 @@ pub fn vcled_f64(a: f64, b: f64) -> u64 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcles_f32(a: f32, b: f32) -> u32 { - unsafe { simd_extract!(vcle_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) } + vget_lane_u32::<0>(vcle_f32(vdup_n_f32(a), vdup_n_f32(b))) } #[doc = "Compare less than or equal"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcled_u64)"] @@ -2480,7 +2724,7 @@ pub fn vcled_s64(a: i64, b: i64) -> u64 { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcleh_f16(a: f16, b: f16) -> u16 { - unsafe { simd_extract!(vcle_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) } + vget_lane_u16::<0>(vcle_f16(vdup_n_f16(a), vdup_n_f16(b))) } #[doc = "Floating-point compare less than or equal to zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclez_f32)"] @@ -2609,7 +2853,7 @@ pub fn vclezq_s64(a: int64x2_t) -> uint64x2_t { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vclezd_f64(a: f64) -> u64 { - unsafe { simd_extract!(vclez_f64(vdup_n_f64(a)), 0) } + vget_lane_u64::<0>(vclez_f64(vdup_n_f64(a))) } #[doc = "Floating-point compare less than or equal to zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclezs_f32)"] @@ -2618,7 +2862,7 @@ pub fn vclezd_f64(a: f64) -> u64 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vclezs_f32(a: f32) -> u32 { - unsafe { simd_extract!(vclez_f32(vdup_n_f32(a)), 0) } + vget_lane_u32::<0>(vclez_f32(vdup_n_f32(a))) } #[doc = "Compare less than or equal to zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclezd_s64)"] @@ -2637,7 +2881,7 @@ pub fn vclezd_s64(a: i64) -> u64 { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vclezh_f16(a: f16) -> u16 { - unsafe { simd_extract!(vclez_f16(vdup_n_f16(a)), 0) } + vget_lane_u16::<0>(vclez_f16(vdup_n_f16(a))) } #[doc = "Floating-point compare less than"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_f64)"] @@ -2719,7 +2963,7 @@ pub fn vcltd_s64(a: i64, b: i64) -> u64 { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vclth_f16(a: f16, b: f16) -> u16 { - unsafe { simd_extract!(vclt_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) } + vget_lane_u16::<0>(vclt_f16(vdup_n_f16(a), vdup_n_f16(b))) } #[doc = "Floating-point compare less than"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclts_f32)"] @@ -2728,7 +2972,7 @@ pub fn vclth_f16(a: f16, b: f16) -> u16 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vclts_f32(a: f32, b: f32) -> u32 { - unsafe { simd_extract!(vclt_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) } + vget_lane_u32::<0>(vclt_f32(vdup_n_f32(a), vdup_n_f32(b))) } #[doc = "Floating-point compare less than"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltd_f64)"] @@ -2737,7 +2981,7 @@ pub fn vclts_f32(a: f32, b: f32) -> u32 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcltd_f64(a: f64, b: f64) -> u64 { - unsafe { simd_extract!(vclt_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) } + vget_lane_u64::<0>(vclt_f64(vdup_n_f64(a), vdup_n_f64(b))) } #[doc = "Floating-point compare less than zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltz_f32)"] @@ -2866,7 +3110,7 @@ pub fn vcltzq_s64(a: int64x2_t) -> uint64x2_t { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcltzd_f64(a: f64) -> u64 { - unsafe { simd_extract!(vcltz_f64(vdup_n_f64(a)), 0) } + vget_lane_u64::<0>(vcltz_f64(vdup_n_f64(a))) } #[doc = "Floating-point compare less than zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltzs_f32)"] @@ -2875,7 +3119,7 @@ pub fn vcltzd_f64(a: f64) -> u64 { #[cfg_attr(test, assert_instr(fcmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcltzs_f32(a: f32) -> u32 { - unsafe { simd_extract!(vcltz_f32(vdup_n_f32(a)), 0) } + vget_lane_u32::<0>(vcltz_f32(vdup_n_f32(a))) } #[doc = "Compare less than zero"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltzd_s64)"] @@ -2894,11 +3138,12 @@ pub fn vcltzd_s64(a: i64) -> u64 { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcltzh_f16(a: f16) -> u16 { - unsafe { simd_extract!(vcltz_f16(vdup_n_f16(a)), 0) } + vget_lane_u16::<0>(vcltz_f16(vdup_n_f16(a))) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] @@ -2915,14 +3160,40 @@ pub fn vcmla_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t unsafe { _vcmla_f16(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_f16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,fcma")] #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg(not(target_arch = "arm64ec"))] #[cfg_attr(test, assert_instr(fcmla))] -pub fn vcmlaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { +pub fn vcmla_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot0.v4f16" + )] + fn _vcmla_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t; + } + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: float16x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: float16x4_t = _vcmla_f16(a, b, c); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmlaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), @@ -2933,8 +3204,34 @@ pub fn vcmlaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t unsafe { _vcmlaq_f16(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmlaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot0.v8f16" + )] + fn _vcmlaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t; + } + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let c: float16x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = _vcmlaq_f16(a, b, c); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcmla))] @@ -2949,8 +3246,32 @@ pub fn vcmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t unsafe { _vcmla_f32(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot0.v2f32" + )] + fn _vcmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t; + } + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let c: float32x2_t = simd_shuffle!(c, c, [1, 0]); + let ret_val: float32x2_t = _vcmla_f32(a, b, c); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcmla))] @@ -2965,8 +3286,32 @@ pub fn vcmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t unsafe { _vcmlaq_f32(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot0.v4f32" + )] + fn _vcmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t; + } + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: float32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: float32x4_t = _vcmlaq_f32(a, b, c); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcmla))] @@ -2981,6 +3326,29 @@ pub fn vcmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t unsafe { _vcmlaq_f64(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot0.v2f64" + )] + fn _vcmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t; + } + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let c: float64x2_t = simd_shuffle!(c, c, [1, 0]); + let ret_val: float64x2_t = _vcmlaq_f64(a, b, c); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_lane_f16)"] #[inline] #[target_feature(enable = "neon,fcma")] @@ -2995,19 +3363,10 @@ pub fn vcmla_lane_f16( c: float16x4_t, ) -> float16x4_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - let c: float16x4_t = simd_shuffle!( - c, - c, - [ - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1 - ] - ); - vcmla_f16(a, b, c) - } + let c = vreinterpret_u32_f16(c); + let c = vdup_lane_u32::(c); + let c = vreinterpret_f16_u32(c); + vcmla_f16(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_lane_f16)"] @@ -3024,23 +3383,10 @@ pub fn vcmlaq_lane_f16( c: float16x4_t, ) -> float16x8_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - let c: float16x8_t = simd_shuffle!( - c, - c, - [ - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1 - ] - ); - vcmlaq_f16(a, b, c) - } + let c = vreinterpret_u32_f16(c); + let c = vdupq_lane_u32::(c); + let c = vreinterpretq_f16_u32(c); + vcmlaq_f16(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_lane_f32)"] @@ -3055,10 +3401,10 @@ pub fn vcmla_lane_f32( c: float32x2_t, ) -> float32x2_t { static_assert!(LANE == 0); - unsafe { - let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]); - vcmla_f32(a, b, c) - } + let c = vreinterpret_u64_f32(c); + let c = vdup_lane_u64::(c); + let c = vreinterpret_f32_u64(c); + vcmla_f32(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_lane_f32)"] @@ -3073,19 +3419,10 @@ pub fn vcmlaq_lane_f32( c: float32x2_t, ) -> float32x4_t { static_assert!(LANE == 0); - unsafe { - let c: float32x4_t = simd_shuffle!( - c, - c, - [ - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1 - ] - ); - vcmlaq_f32(a, b, c) - } + let c = vreinterpret_u64_f32(c); + let c = vdupq_lane_u64::(c); + let c = vreinterpretq_f32_u64(c); + vcmlaq_f32(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_laneq_f16)"] @@ -3102,19 +3439,10 @@ pub fn vcmla_laneq_f16( c: float16x8_t, ) -> float16x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - let c: float16x4_t = simd_shuffle!( - c, - c, - [ - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1 - ] - ); - vcmla_f16(a, b, c) - } + let c = vreinterpretq_u32_f16(c); + let c = vdup_laneq_u32::(c); + let c = vreinterpret_f16_u32(c); + vcmla_f16(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_laneq_f16)"] @@ -3131,23 +3459,10 @@ pub fn vcmlaq_laneq_f16( c: float16x8_t, ) -> float16x8_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - let c: float16x8_t = simd_shuffle!( - c, - c, - [ - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1 - ] - ); - vcmlaq_f16(a, b, c) - } + let c = vreinterpretq_u32_f16(c); + let c = vdupq_laneq_u32::(c); + let c = vreinterpretq_f16_u32(c); + vcmlaq_f16(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_laneq_f32)"] @@ -3162,10 +3477,10 @@ pub fn vcmla_laneq_f32( c: float32x4_t, ) -> float32x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]); - vcmla_f32(a, b, c) - } + let c = vreinterpretq_u64_f32(c); + let c = vdup_laneq_u64::(c); + let c = vreinterpret_f32_u64(c); + vcmla_f32(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_laneq_f32)"] @@ -3180,23 +3495,15 @@ pub fn vcmlaq_laneq_f32( c: float32x4_t, ) -> float32x4_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - let c: float32x4_t = simd_shuffle!( - c, - c, - [ - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1 - ] - ); - vcmlaq_f32(a, b, c) - } + let c = vreinterpretq_u64_f32(c); + let c = vdupq_laneq_u64::(c); + let c = vreinterpretq_f32_u64(c); + vcmlaq_f32(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] @@ -3213,8 +3520,34 @@ pub fn vcmla_rot180_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float unsafe { _vcmla_rot180_f16(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmla_rot180_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot180.v4f16" + )] + fn _vcmla_rot180_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t; + } + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: float16x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: float16x4_t = _vcmla_rot180_f16(a, b, c); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] @@ -3231,8 +3564,34 @@ pub fn vcmlaq_rot180_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> floa unsafe { _vcmlaq_rot180_f16(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmlaq_rot180_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot180.v8f16" + )] + fn _vcmlaq_rot180_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t; + } + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let c: float16x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = _vcmlaq_rot180_f16(a, b, c); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcmla))] @@ -3247,8 +3606,32 @@ pub fn vcmla_rot180_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float unsafe { _vcmla_rot180_f32(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmla_rot180_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot180.v2f32" + )] + fn _vcmla_rot180_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t; + } + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let c: float32x2_t = simd_shuffle!(c, c, [1, 0]); + let ret_val: float32x2_t = _vcmla_rot180_f32(a, b, c); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcmla))] @@ -3263,8 +3646,32 @@ pub fn vcmlaq_rot180_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> floa unsafe { _vcmlaq_rot180_f32(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmlaq_rot180_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot180.v4f32" + )] + fn _vcmlaq_rot180_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t; + } + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: float32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: float32x4_t = _vcmlaq_rot180_f32(a, b, c); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcmla))] @@ -3279,6 +3686,29 @@ pub fn vcmlaq_rot180_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> floa unsafe { _vcmlaq_rot180_f64(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmlaq_rot180_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot180.v2f64" + )] + fn _vcmlaq_rot180_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t; + } + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let c: float64x2_t = simd_shuffle!(c, c, [1, 0]); + let ret_val: float64x2_t = _vcmlaq_rot180_f64(a, b, c); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_lane_f16)"] #[inline] #[target_feature(enable = "neon,fcma")] @@ -3293,19 +3723,10 @@ pub fn vcmla_rot180_lane_f16( c: float16x4_t, ) -> float16x4_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - let c: float16x4_t = simd_shuffle!( - c, - c, - [ - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1 - ] - ); - vcmla_rot180_f16(a, b, c) - } + let c = vreinterpret_u32_f16(c); + let c = vdup_lane_u32::(c); + let c = vreinterpret_f16_u32(c); + vcmla_rot180_f16(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_lane_f16)"] @@ -3322,23 +3743,10 @@ pub fn vcmlaq_rot180_lane_f16( c: float16x4_t, ) -> float16x8_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - let c: float16x8_t = simd_shuffle!( - c, - c, - [ - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1 - ] - ); - vcmlaq_rot180_f16(a, b, c) - } + let c = vreinterpret_u32_f16(c); + let c = vdupq_lane_u32::(c); + let c = vreinterpretq_f16_u32(c); + vcmlaq_rot180_f16(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_lane_f32)"] @@ -3353,10 +3761,10 @@ pub fn vcmla_rot180_lane_f32( c: float32x2_t, ) -> float32x2_t { static_assert!(LANE == 0); - unsafe { - let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]); - vcmla_rot180_f32(a, b, c) - } + let c = vreinterpret_u64_f32(c); + let c = vdup_lane_u64::(c); + let c = vreinterpret_f32_u64(c); + vcmla_rot180_f32(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_lane_f32)"] @@ -3371,19 +3779,10 @@ pub fn vcmlaq_rot180_lane_f32( c: float32x2_t, ) -> float32x4_t { static_assert!(LANE == 0); - unsafe { - let c: float32x4_t = simd_shuffle!( - c, - c, - [ - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1 - ] - ); - vcmlaq_rot180_f32(a, b, c) - } + let c = vreinterpret_u64_f32(c); + let c = vdupq_lane_u64::(c); + let c = vreinterpretq_f32_u64(c); + vcmlaq_rot180_f32(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_laneq_f16)"] @@ -3400,19 +3799,10 @@ pub fn vcmla_rot180_laneq_f16( c: float16x8_t, ) -> float16x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - let c: float16x4_t = simd_shuffle!( - c, - c, - [ - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1 - ] - ); - vcmla_rot180_f16(a, b, c) - } + let c = vreinterpretq_u32_f16(c); + let c = vdup_laneq_u32::(c); + let c = vreinterpret_f16_u32(c); + vcmla_rot180_f16(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_laneq_f16)"] @@ -3429,23 +3819,10 @@ pub fn vcmlaq_rot180_laneq_f16( c: float16x8_t, ) -> float16x8_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - let c: float16x8_t = simd_shuffle!( - c, - c, - [ - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1 - ] - ); - vcmlaq_rot180_f16(a, b, c) - } + let c = vreinterpretq_u32_f16(c); + let c = vdupq_laneq_u32::(c); + let c = vreinterpretq_f16_u32(c); + vcmlaq_rot180_f16(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_laneq_f32)"] @@ -3460,10 +3837,10 @@ pub fn vcmla_rot180_laneq_f32( c: float32x4_t, ) -> float32x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]); - vcmla_rot180_f32(a, b, c) - } + let c = vreinterpretq_u64_f32(c); + let c = vdup_laneq_u64::(c); + let c = vreinterpret_f32_u64(c); + vcmla_rot180_f32(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_laneq_f32)"] @@ -3478,23 +3855,15 @@ pub fn vcmlaq_rot180_laneq_f32( c: float32x4_t, ) -> float32x4_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - let c: float32x4_t = simd_shuffle!( - c, - c, - [ - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1 - ] - ); - vcmlaq_rot180_f32(a, b, c) - } + let c = vreinterpretq_u64_f32(c); + let c = vdupq_laneq_u64::(c); + let c = vreinterpretq_f32_u64(c); + vcmlaq_rot180_f32(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] @@ -3511,8 +3880,34 @@ pub fn vcmla_rot270_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float unsafe { _vcmla_rot270_f16(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmla_rot270_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot270.v4f16" + )] + fn _vcmla_rot270_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t; + } + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: float16x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: float16x4_t = _vcmla_rot270_f16(a, b, c); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] @@ -3529,8 +3924,34 @@ pub fn vcmlaq_rot270_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> floa unsafe { _vcmlaq_rot270_f16(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmlaq_rot270_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot270.v8f16" + )] + fn _vcmlaq_rot270_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t; + } + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let c: float16x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = _vcmlaq_rot270_f16(a, b, c); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcmla))] @@ -3545,8 +3966,32 @@ pub fn vcmla_rot270_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float unsafe { _vcmla_rot270_f32(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmla_rot270_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot270.v2f32" + )] + fn _vcmla_rot270_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t; + } + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let c: float32x2_t = simd_shuffle!(c, c, [1, 0]); + let ret_val: float32x2_t = _vcmla_rot270_f32(a, b, c); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcmla))] @@ -3561,8 +4006,32 @@ pub fn vcmlaq_rot270_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> floa unsafe { _vcmlaq_rot270_f32(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmlaq_rot270_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot270.v4f32" + )] + fn _vcmlaq_rot270_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t; + } + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: float32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: float32x4_t = _vcmlaq_rot270_f32(a, b, c); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcmla))] @@ -3577,6 +4046,29 @@ pub fn vcmlaq_rot270_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> floa unsafe { _vcmlaq_rot270_f64(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmlaq_rot270_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot270.v2f64" + )] + fn _vcmlaq_rot270_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t; + } + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let c: float64x2_t = simd_shuffle!(c, c, [1, 0]); + let ret_val: float64x2_t = _vcmlaq_rot270_f64(a, b, c); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_lane_f16)"] #[inline] #[target_feature(enable = "neon,fcma")] @@ -3591,19 +4083,10 @@ pub fn vcmla_rot270_lane_f16( c: float16x4_t, ) -> float16x4_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - let c: float16x4_t = simd_shuffle!( - c, - c, - [ - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1 - ] - ); - vcmla_rot270_f16(a, b, c) - } + let c = vreinterpret_u32_f16(c); + let c = vdup_lane_u32::(c); + let c = vreinterpret_f16_u32(c); + vcmla_rot270_f16(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_lane_f16)"] @@ -3620,23 +4103,10 @@ pub fn vcmlaq_rot270_lane_f16( c: float16x4_t, ) -> float16x8_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - let c: float16x8_t = simd_shuffle!( - c, - c, - [ - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1 - ] - ); - vcmlaq_rot270_f16(a, b, c) - } + let c = vreinterpret_u32_f16(c); + let c = vdupq_lane_u32::(c); + let c = vreinterpretq_f16_u32(c); + vcmlaq_rot270_f16(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_lane_f32)"] @@ -3651,10 +4121,10 @@ pub fn vcmla_rot270_lane_f32( c: float32x2_t, ) -> float32x2_t { static_assert!(LANE == 0); - unsafe { - let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]); - vcmla_rot270_f32(a, b, c) - } + let c = vreinterpret_u64_f32(c); + let c = vdup_lane_u64::(c); + let c = vreinterpret_f32_u64(c); + vcmla_rot270_f32(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_lane_f32)"] @@ -3669,19 +4139,10 @@ pub fn vcmlaq_rot270_lane_f32( c: float32x2_t, ) -> float32x4_t { static_assert!(LANE == 0); - unsafe { - let c: float32x4_t = simd_shuffle!( - c, - c, - [ - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1 - ] - ); - vcmlaq_rot270_f32(a, b, c) - } + let c = vreinterpret_u64_f32(c); + let c = vdupq_lane_u64::(c); + let c = vreinterpretq_f32_u64(c); + vcmlaq_rot270_f32(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_laneq_f16)"] @@ -3698,19 +4159,10 @@ pub fn vcmla_rot270_laneq_f16( c: float16x8_t, ) -> float16x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - let c: float16x4_t = simd_shuffle!( - c, - c, - [ - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1 - ] - ); - vcmla_rot270_f16(a, b, c) - } + let c = vreinterpretq_u32_f16(c); + let c = vdup_laneq_u32::(c); + let c = vreinterpret_f16_u32(c); + vcmla_rot270_f16(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_laneq_f16)"] @@ -3727,23 +4179,10 @@ pub fn vcmlaq_rot270_laneq_f16( c: float16x8_t, ) -> float16x8_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - let c: float16x8_t = simd_shuffle!( - c, - c, - [ - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1 - ] - ); - vcmlaq_rot270_f16(a, b, c) - } + let c = vreinterpretq_u32_f16(c); + let c = vdupq_laneq_u32::(c); + let c = vreinterpretq_f16_u32(c); + vcmlaq_rot270_f16(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_laneq_f32)"] @@ -3758,10 +4197,10 @@ pub fn vcmla_rot270_laneq_f32( c: float32x4_t, ) -> float32x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]); - vcmla_rot270_f32(a, b, c) - } + let c = vreinterpretq_u64_f32(c); + let c = vdup_laneq_u64::(c); + let c = vreinterpret_f32_u64(c); + vcmla_rot270_f32(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_laneq_f32)"] @@ -3776,23 +4215,15 @@ pub fn vcmlaq_rot270_laneq_f32( c: float32x4_t, ) -> float32x4_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - let c: float32x4_t = simd_shuffle!( - c, - c, - [ - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1 - ] - ); - vcmlaq_rot270_f32(a, b, c) - } + let c = vreinterpretq_u64_f32(c); + let c = vdupq_laneq_u64::(c); + let c = vreinterpretq_f32_u64(c); + vcmlaq_rot270_f32(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] @@ -3809,8 +4240,34 @@ pub fn vcmla_rot90_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float1 unsafe { _vcmla_rot90_f16(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmla_rot90_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot90.v4f16" + )] + fn _vcmla_rot90_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t; + } + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: float16x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: float16x4_t = _vcmla_rot90_f16(a, b, c); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] @@ -3827,8 +4284,34 @@ pub fn vcmlaq_rot90_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float unsafe { _vcmlaq_rot90_f16(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmlaq_rot90_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot90.v8f16" + )] + fn _vcmlaq_rot90_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t; + } + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let c: float16x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = _vcmlaq_rot90_f16(a, b, c); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcmla))] @@ -3843,8 +4326,32 @@ pub fn vcmla_rot90_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float3 unsafe { _vcmla_rot90_f32(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmla_rot90_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot90.v2f32" + )] + fn _vcmla_rot90_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t; + } + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let c: float32x2_t = simd_shuffle!(c, c, [1, 0]); + let ret_val: float32x2_t = _vcmla_rot90_f32(a, b, c); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcmla))] @@ -3859,23 +4366,70 @@ pub fn vcmlaq_rot90_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float unsafe { _vcmlaq_rot90_f32(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,fcma")] #[unstable(feature = "stdarch_neon_fcma", issue = "117222")] #[cfg_attr(test, assert_instr(fcmla))] -pub fn vcmlaq_rot90_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t { +pub fn vcmlaq_rot90_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.vcmla.rot90.v2f64" + link_name = "llvm.aarch64.neon.vcmla.rot90.v4f32" )] - fn _vcmlaq_rot90_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t; + fn _vcmlaq_rot90_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t; + } + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: float32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: float32x4_t = _vcmlaq_rot90_f32(a, b, c); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vcmlaq_rot90_f64(a, b, c) } } #[doc = "Floating-point complex multiply accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_lane_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_f64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmlaq_rot90_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot90.v2f64" + )] + fn _vcmlaq_rot90_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t; + } + unsafe { _vcmlaq_rot90_f64(a, b, c) } +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fcma")] +#[unstable(feature = "stdarch_neon_fcma", issue = "117222")] +#[cfg_attr(test, assert_instr(fcmla))] +pub fn vcmlaq_rot90_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vcmla.rot90.v2f64" + )] + fn _vcmlaq_rot90_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t; + } + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let c: float64x2_t = simd_shuffle!(c, c, [1, 0]); + let ret_val: float64x2_t = _vcmlaq_rot90_f64(a, b, c); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point complex multiply accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_lane_f16)"] #[inline] #[target_feature(enable = "neon,fcma")] #[cfg_attr(test, assert_instr(fcmla, LANE = 0))] @@ -3889,19 +4443,10 @@ pub fn vcmla_rot90_lane_f16( c: float16x4_t, ) -> float16x4_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - let c: float16x4_t = simd_shuffle!( - c, - c, - [ - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1 - ] - ); - vcmla_rot90_f16(a, b, c) - } + let c = vreinterpret_u32_f16(c); + let c = vdup_lane_u32::(c); + let c = vreinterpret_f16_u32(c); + vcmla_rot90_f16(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_lane_f16)"] @@ -3918,23 +4463,10 @@ pub fn vcmlaq_rot90_lane_f16( c: float16x4_t, ) -> float16x8_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - let c: float16x8_t = simd_shuffle!( - c, - c, - [ - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1 - ] - ); - vcmlaq_rot90_f16(a, b, c) - } + let c = vreinterpret_u32_f16(c); + let c = vdupq_lane_u32::(c); + let c = vreinterpretq_f16_u32(c); + vcmlaq_rot90_f16(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_lane_f32)"] @@ -3949,10 +4481,10 @@ pub fn vcmla_rot90_lane_f32( c: float32x2_t, ) -> float32x2_t { static_assert!(LANE == 0); - unsafe { - let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]); - vcmla_rot90_f32(a, b, c) - } + let c = vreinterpret_u64_f32(c); + let c = vdup_lane_u64::(c); + let c = vreinterpret_f32_u64(c); + vcmla_rot90_f32(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_lane_f32)"] @@ -3967,19 +4499,10 @@ pub fn vcmlaq_rot90_lane_f32( c: float32x2_t, ) -> float32x4_t { static_assert!(LANE == 0); - unsafe { - let c: float32x4_t = simd_shuffle!( - c, - c, - [ - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1 - ] - ); - vcmlaq_rot90_f32(a, b, c) - } + let c = vreinterpret_u64_f32(c); + let c = vdupq_lane_u64::(c); + let c = vreinterpretq_f32_u64(c); + vcmlaq_rot90_f32(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_laneq_f16)"] @@ -3996,19 +4519,10 @@ pub fn vcmla_rot90_laneq_f16( c: float16x8_t, ) -> float16x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - let c: float16x4_t = simd_shuffle!( - c, - c, - [ - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1 - ] - ); - vcmla_rot90_f16(a, b, c) - } + let c = vreinterpretq_u32_f16(c); + let c = vdup_laneq_u32::(c); + let c = vreinterpret_f16_u32(c); + vcmla_rot90_f16(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_laneq_f16)"] @@ -4025,23 +4539,10 @@ pub fn vcmlaq_rot90_laneq_f16( c: float16x8_t, ) -> float16x8_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - let c: float16x8_t = simd_shuffle!( - c, - c, - [ - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1 - ] - ); - vcmlaq_rot90_f16(a, b, c) - } + let c = vreinterpretq_u32_f16(c); + let c = vdupq_laneq_u32::(c); + let c = vreinterpretq_f16_u32(c); + vcmlaq_rot90_f16(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_laneq_f32)"] @@ -4056,10 +4557,10 @@ pub fn vcmla_rot90_laneq_f32( c: float32x4_t, ) -> float32x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]); - vcmla_rot90_f32(a, b, c) - } + let c = vreinterpretq_u64_f32(c); + let c = vdup_laneq_u64::(c); + let c = vreinterpret_f32_u64(c); + vcmla_rot90_f32(a, b, c) } #[doc = "Floating-point complex multiply accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_laneq_f32)"] @@ -4074,25 +4575,42 @@ pub fn vcmlaq_rot90_laneq_f32( c: float32x4_t, ) -> float32x4_t { static_assert_uimm_bits!(LANE, 1); + let c = vreinterpretq_u64_f32(c); + let c = vdupq_laneq_u64::(c); + let c = vreinterpretq_f32_u64(c); + vcmlaq_rot90_f32(a, b, c) +} +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(mov))] +pub fn vcombine_f64(a: float64x1_t, b: float64x1_t) -> float64x2_t { + unsafe { simd_shuffle!(a, b, [0, 1]) } +} +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(mov))] +pub fn vcombine_f64(a: float64x1_t, b: float64x1_t) -> float64x2_t { unsafe { - let c: float32x4_t = simd_shuffle!( - c, - c, - [ - 2 * LANE as u32, - 2 * LANE as u32 + 1, - 2 * LANE as u32, - 2 * LANE as u32 + 1 - ] - ); - vcmlaq_rot90_f32(a, b, c) + let ret_val: float64x2_t = simd_shuffle!(a, b, [0, 1]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopy_lane_f32( @@ -4101,105 +4619,76 @@ pub fn vcopy_lane_f32( ) -> float32x2_t { static_assert_uimm_bits!(LANE1, 1); static_assert_uimm_bits!(LANE2, 1); - unsafe { - match LANE1 & 0b1 { - 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), - 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + vset_lane_f32::(vget_lane_f32::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopy_lane_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { static_assert_uimm_bits!(LANE1, 3); static_assert_uimm_bits!(LANE2, 3); - unsafe { - match LANE1 & 0b111 { - 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), - 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), - 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), - 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), - 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), - 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), - 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + vset_lane_s8::(vget_lane_s8::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopy_lane_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { static_assert_uimm_bits!(LANE1, 2); static_assert_uimm_bits!(LANE2, 2); - unsafe { - match LANE1 & 0b11 { - 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), - 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), - 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), - 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + vset_lane_s16::(vget_lane_s16::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopy_lane_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { static_assert_uimm_bits!(LANE1, 1); static_assert_uimm_bits!(LANE2, 1); - unsafe { - match LANE1 & 0b1 { - 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), - 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + vset_lane_s32::(vget_lane_s32::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_u8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopy_lane_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { static_assert_uimm_bits!(LANE1, 3); static_assert_uimm_bits!(LANE2, 3); - unsafe { - match LANE1 & 0b111 { - 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), - 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), - 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), - 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), - 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), - 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), - 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + vset_lane_u8::(vget_lane_u8::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_u16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopy_lane_u16( @@ -4208,21 +4697,16 @@ pub fn vcopy_lane_u16( ) -> uint16x4_t { static_assert_uimm_bits!(LANE1, 2); static_assert_uimm_bits!(LANE2, 2); - unsafe { - match LANE1 & 0b11 { - 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), - 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), - 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), - 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + vset_lane_u16::(vget_lane_u16::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopy_lane_u32( @@ -4231,43 +4715,31 @@ pub fn vcopy_lane_u32( ) -> uint32x2_t { static_assert_uimm_bits!(LANE1, 1); static_assert_uimm_bits!(LANE2, 1); - unsafe { - match LANE1 & 0b1 { - 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), - 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + vset_lane_u32::(vget_lane_u32::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_p8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopy_lane_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { static_assert_uimm_bits!(LANE1, 3); static_assert_uimm_bits!(LANE2, 3); - unsafe { - match LANE1 & 0b111 { - 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), - 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), - 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), - 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), - 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), - 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), - 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + vset_lane_p8::(vget_lane_p8::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_p16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopy_lane_p16( @@ -4276,21 +4748,76 @@ pub fn vcopy_lane_p16( ) -> poly16x4_t { static_assert_uimm_bits!(LANE1, 2); static_assert_uimm_bits!(LANE2, 2); - unsafe { - match LANE1 & 0b11 { - 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), - 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), - 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), - 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + vset_lane_p16::(vget_lane_p16::(b), a) +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 0))] +#[rustc_legacy_const_generics(1, 3)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vcopy_lane_f64( + _a: float64x1_t, + b: float64x1_t, +) -> float64x1_t { + static_assert!(LANE1 == 0); + static_assert!(LANE2 == 0); + b +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 0))] +#[rustc_legacy_const_generics(1, 3)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vcopy_lane_s64( + _a: int64x1_t, + b: int64x1_t, +) -> int64x1_t { + static_assert!(LANE1 == 0); + static_assert!(LANE2 == 0); + b +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 0))] +#[rustc_legacy_const_generics(1, 3)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vcopy_lane_u64( + _a: uint64x1_t, + b: uint64x1_t, +) -> uint64x1_t { + static_assert!(LANE1 == 0); + static_assert!(LANE2 == 0); + b +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_p64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 0))] +#[rustc_legacy_const_generics(1, 3)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vcopy_lane_p64( + _a: poly64x1_t, + b: poly64x1_t, +) -> poly64x1_t { + static_assert!(LANE1 == 0); + static_assert!(LANE2 == 0); + b } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopy_laneq_f32( @@ -4299,46 +4826,31 @@ pub fn vcopy_laneq_f32( ) -> float32x2_t { static_assert_uimm_bits!(LANE1, 1); static_assert_uimm_bits!(LANE2, 2); - unsafe { - let a: float32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]); - match LANE1 & 0b1 { - 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]), - 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + vset_lane_f32::(vgetq_lane_f32::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopy_laneq_s8(a: int8x8_t, b: int8x16_t) -> int8x8_t { static_assert_uimm_bits!(LANE1, 3); static_assert_uimm_bits!(LANE2, 4); - unsafe { - let a: int8x16_t = - simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); - match LANE1 & 0b111 { - 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), - 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), - 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), - 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), - 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), - 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), - 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + vset_lane_s8::(vgetq_lane_s8::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopy_laneq_s16( @@ -4347,22 +4859,16 @@ pub fn vcopy_laneq_s16( ) -> int16x4_t { static_assert_uimm_bits!(LANE1, 2); static_assert_uimm_bits!(LANE2, 3); - unsafe { - let a: int16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); - match LANE1 & 0b11 { - 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]), - 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]), - 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]), - 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + vset_lane_s16::(vgetq_lane_s16::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopy_laneq_s32( @@ -4371,20 +4877,16 @@ pub fn vcopy_laneq_s32( ) -> int32x2_t { static_assert_uimm_bits!(LANE1, 1); static_assert_uimm_bits!(LANE2, 2); - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]); - match LANE1 & 0b1 { - 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]), - 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + vset_lane_s32::(vgetq_lane_s32::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_u8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopy_laneq_u8( @@ -4393,27 +4895,16 @@ pub fn vcopy_laneq_u8( ) -> uint8x8_t { static_assert_uimm_bits!(LANE1, 3); static_assert_uimm_bits!(LANE2, 4); - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); - match LANE1 & 0b111 { - 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), - 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), - 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), - 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), - 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), - 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), - 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + vset_lane_u8::(vgetq_lane_u8::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_u16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopy_laneq_u16( @@ -4422,22 +4913,16 @@ pub fn vcopy_laneq_u16( ) -> uint16x4_t { static_assert_uimm_bits!(LANE1, 2); static_assert_uimm_bits!(LANE2, 3); - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); - match LANE1 & 0b11 { - 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]), - 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]), - 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]), - 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + vset_lane_u16::(vgetq_lane_u16::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopy_laneq_u32( @@ -4446,20 +4931,16 @@ pub fn vcopy_laneq_u32( ) -> uint32x2_t { static_assert_uimm_bits!(LANE1, 1); static_assert_uimm_bits!(LANE2, 2); - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]); - match LANE1 & 0b1 { - 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]), - 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + vset_lane_u32::(vgetq_lane_u32::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_p8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopy_laneq_p8( @@ -4468,27 +4949,16 @@ pub fn vcopy_laneq_p8( ) -> poly8x8_t { static_assert_uimm_bits!(LANE1, 3); static_assert_uimm_bits!(LANE2, 4); - unsafe { - let a: poly8x16_t = - simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); - match LANE1 & 0b111 { - 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), - 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), - 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), - 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), - 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), - 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), - 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + vset_lane_p8::(vgetq_lane_p8::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_p16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopy_laneq_p16( @@ -4497,16 +4967,67 @@ pub fn vcopy_laneq_p16( ) -> poly16x4_t { static_assert_uimm_bits!(LANE1, 2); static_assert_uimm_bits!(LANE2, 3); - unsafe { - let a: poly16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); - match LANE1 & 0b11 { - 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]), - 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]), - 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]), - 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + vset_lane_p16::(vgetq_lane_p16::(b), a) +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vcopy_laneq_f64( + _a: float64x1_t, + b: float64x2_t, +) -> float64x1_t { + static_assert!(LANE1 == 0); + static_assert_uimm_bits!(LANE2, 1); + unsafe { transmute(vgetq_lane_f64::(b)) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vcopy_laneq_s64( + _a: int64x1_t, + b: int64x2_t, +) -> int64x1_t { + static_assert!(LANE1 == 0); + static_assert_uimm_bits!(LANE2, 1); + unsafe { transmute(vgetq_lane_s64::(b)) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vcopy_laneq_u64( + _a: uint64x1_t, + b: uint64x2_t, +) -> uint64x1_t { + static_assert!(LANE1 == 0); + static_assert_uimm_bits!(LANE2, 1); + unsafe { transmute(vgetq_lane_u64::(b)) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_p64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))] +#[rustc_legacy_const_generics(1, 3)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vcopy_laneq_p64( + _a: poly64x1_t, + b: poly64x2_t, +) -> poly64x1_t { + static_assert!(LANE1 == 0); + static_assert_uimm_bits!(LANE2, 1); + unsafe { transmute(vgetq_lane_p64::(b)) } } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_f32)"] @@ -4521,22 +5042,16 @@ pub fn vcopyq_lane_f32( ) -> float32x4_t { static_assert_uimm_bits!(LANE1, 2); static_assert_uimm_bits!(LANE2, 1); - unsafe { - let b: float32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]); - match LANE1 & 0b11 { - 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), - 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), - 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), - 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + vsetq_lane_f32::(vget_lane_f32::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_f64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 1, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopyq_lane_f64( @@ -4545,20 +5060,17 @@ pub fn vcopyq_lane_f64( ) -> float64x2_t { static_assert_uimm_bits!(LANE1, 1); static_assert!(LANE2 == 0); - unsafe { - let b: float64x2_t = simd_shuffle!(b, b, [0, 1]); - match LANE1 & 0b1 { - 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), - 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + let b: float64x2_t = vcombine_f64(b, b); + vsetq_lane_f64::(vgetq_lane_f64::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 1, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopyq_lane_s64( @@ -4567,20 +5079,17 @@ pub fn vcopyq_lane_s64( ) -> int64x2_t { static_assert_uimm_bits!(LANE1, 1); static_assert!(LANE2 == 0); - unsafe { - let b: int64x2_t = simd_shuffle!(b, b, [0, 1]); - match LANE1 & 0b1 { - 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), - 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + let b: int64x2_t = vcombine_s64(b, b); + vsetq_lane_s64::(vgetq_lane_s64::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 1, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopyq_lane_u64( @@ -4589,20 +5098,38 @@ pub fn vcopyq_lane_u64( ) -> uint64x2_t { static_assert_uimm_bits!(LANE1, 1); static_assert!(LANE2 == 0); - unsafe { - let b: uint64x2_t = simd_shuffle!(b, b, [0, 1]); - match LANE1 & 0b1 { - 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), - 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + let b: uint64x2_t = vcombine_u64(b, b); + vsetq_lane_u64::(vgetq_lane_u64::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 1, LANE2 = 0) +)] +#[rustc_legacy_const_generics(1, 3)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vcopyq_lane_p64( + a: poly64x2_t, + b: poly64x1_t, +) -> poly64x2_t { + static_assert_uimm_bits!(LANE1, 1); + static_assert!(LANE2 == 0); + let b: poly64x2_t = vcombine_p64(b, b); + unsafe { simd_insert!(a, LANE1 as u32, simd_extract!(b, LANE2 as u32, p64)) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 1, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopyq_lane_p64( @@ -4612,389 +5139,37 @@ pub fn vcopyq_lane_p64( static_assert_uimm_bits!(LANE1, 1); static_assert!(LANE2 == 0); unsafe { - let b: poly64x2_t = simd_shuffle!(b, b, [0, 1]); - match LANE1 & 0b1 { - 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), - 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), - _ => unreachable_unchecked(), - } + let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: poly64x2_t = vcombine_p64(b, b); + let ret_val: poly64x2_t = + simd_insert!(a, LANE1 as u32, simd_extract!(b, LANE2 as u32, p64)); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopyq_lane_s8(a: int8x16_t, b: int8x8_t) -> int8x16_t { static_assert_uimm_bits!(LANE1, 4); static_assert_uimm_bits!(LANE2, 3); - unsafe { - let b: int8x16_t = - simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); - match LANE1 & 0b1111 { - 0 => simd_shuffle!( - a, - b, - [ - 16 + LANE2 as u32, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 1 => simd_shuffle!( - a, - b, - [ - 0, - 16 + LANE2 as u32, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 2 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 16 + LANE2 as u32, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 3 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 16 + LANE2 as u32, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 4 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 16 + LANE2 as u32, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 5 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 16 + LANE2 as u32, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 6 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 16 + LANE2 as u32, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 7 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 16 + LANE2 as u32, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 8 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 16 + LANE2 as u32, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 9 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 16 + LANE2 as u32, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 10 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 16 + LANE2 as u32, - 11, - 12, - 13, - 14, - 15 - ] - ), - 11 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 16 + LANE2 as u32, - 12, - 13, - 14, - 15 - ] - ), - 12 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 16 + LANE2 as u32, - 13, - 14, - 15 - ] - ), - 13 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 16 + LANE2 as u32, - 14, - 15 - ] - ), - 14 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 16 + LANE2 as u32, - 15 - ] - ), - 15 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 16 + LANE2 as u32 - ] - ), - _ => unreachable_unchecked(), - } - } + let b: int8x16_t = vcombine_s8(b, b); + vsetq_lane_s8::(vgetq_lane_s8::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopyq_lane_s16( @@ -5003,26 +5178,17 @@ pub fn vcopyq_lane_s16( ) -> int16x8_t { static_assert_uimm_bits!(LANE1, 3); static_assert_uimm_bits!(LANE2, 2); - unsafe { - let b: int16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]); - match LANE1 & 0b111 { - 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), - 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), - 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), - 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), - 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), - 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), - 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + let b: int16x8_t = vcombine_s16(b, b); + vsetq_lane_s16::(vgetq_lane_s16::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopyq_lane_s32( @@ -5031,22 +5197,17 @@ pub fn vcopyq_lane_s32( ) -> int32x4_t { static_assert_uimm_bits!(LANE1, 2); static_assert_uimm_bits!(LANE2, 1); - unsafe { - let b: int32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]); - match LANE1 & 0b11 { - 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), - 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), - 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), - 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + let b: int32x4_t = vcombine_s32(b, b); + vsetq_lane_s32::(vgetq_lane_s32::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopyq_lane_u8( @@ -5055,371 +5216,17 @@ pub fn vcopyq_lane_u8( ) -> uint8x16_t { static_assert_uimm_bits!(LANE1, 4); static_assert_uimm_bits!(LANE2, 3); - unsafe { - let b: uint8x16_t = - simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); - match LANE1 & 0b1111 { - 0 => simd_shuffle!( - a, - b, - [ - 16 + LANE2 as u32, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 1 => simd_shuffle!( - a, - b, - [ - 0, - 16 + LANE2 as u32, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 2 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 16 + LANE2 as u32, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 3 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 16 + LANE2 as u32, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 4 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 16 + LANE2 as u32, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 5 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 16 + LANE2 as u32, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 6 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 16 + LANE2 as u32, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 7 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 16 + LANE2 as u32, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 8 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 16 + LANE2 as u32, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 9 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 16 + LANE2 as u32, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 10 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 16 + LANE2 as u32, - 11, - 12, - 13, - 14, - 15 - ] - ), - 11 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 16 + LANE2 as u32, - 12, - 13, - 14, - 15 - ] - ), - 12 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 16 + LANE2 as u32, - 13, - 14, - 15 - ] - ), - 13 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 16 + LANE2 as u32, - 14, - 15 - ] - ), - 14 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 16 + LANE2 as u32, - 15 - ] - ), - 15 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 16 + LANE2 as u32 - ] - ), - _ => unreachable_unchecked(), - } - } + let b: uint8x16_t = vcombine_u8(b, b); + vsetq_lane_u8::(vgetq_lane_u8::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopyq_lane_u16( @@ -5428,26 +5235,17 @@ pub fn vcopyq_lane_u16( ) -> uint16x8_t { static_assert_uimm_bits!(LANE1, 3); static_assert_uimm_bits!(LANE2, 2); - unsafe { - let b: uint16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]); - match LANE1 & 0b111 { - 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), - 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), - 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), - 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), - 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), - 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), - 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + let b: uint16x8_t = vcombine_u16(b, b); + vsetq_lane_u16::(vgetq_lane_u16::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopyq_lane_u32( @@ -5456,22 +5254,17 @@ pub fn vcopyq_lane_u32( ) -> uint32x4_t { static_assert_uimm_bits!(LANE1, 2); static_assert_uimm_bits!(LANE2, 1); - unsafe { - let b: uint32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]); - match LANE1 & 0b11 { - 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), - 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), - 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), - 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + let b: uint32x4_t = vcombine_u32(b, b); + vsetq_lane_u32::(vgetq_lane_u32::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_p8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopyq_lane_p8( @@ -5480,371 +5273,17 @@ pub fn vcopyq_lane_p8( ) -> poly8x16_t { static_assert_uimm_bits!(LANE1, 4); static_assert_uimm_bits!(LANE2, 3); - unsafe { - let b: poly8x16_t = - simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); - match LANE1 & 0b1111 { - 0 => simd_shuffle!( - a, - b, - [ - 16 + LANE2 as u32, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 1 => simd_shuffle!( - a, - b, - [ - 0, - 16 + LANE2 as u32, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 2 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 16 + LANE2 as u32, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 3 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 16 + LANE2 as u32, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 4 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 16 + LANE2 as u32, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 5 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 16 + LANE2 as u32, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 6 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 16 + LANE2 as u32, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 7 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 16 + LANE2 as u32, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 8 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 16 + LANE2 as u32, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 9 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 16 + LANE2 as u32, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 10 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 16 + LANE2 as u32, - 11, - 12, - 13, - 14, - 15 - ] - ), - 11 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 16 + LANE2 as u32, - 12, - 13, - 14, - 15 - ] - ), - 12 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 16 + LANE2 as u32, - 13, - 14, - 15 - ] - ), - 13 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 16 + LANE2 as u32, - 14, - 15 - ] - ), - 14 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 16 + LANE2 as u32, - 15 - ] - ), - 15 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 16 + LANE2 as u32 - ] - ), - _ => unreachable_unchecked(), - } - } + let b: poly8x16_t = vcombine_p8(b, b); + vsetq_lane_p8::(vgetq_lane_p8::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_p16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopyq_lane_p16( @@ -5853,26 +5292,17 @@ pub fn vcopyq_lane_p16( ) -> poly16x8_t { static_assert_uimm_bits!(LANE1, 3); static_assert_uimm_bits!(LANE2, 2); - unsafe { - let b: poly16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]); - match LANE1 & 0b111 { - 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), - 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), - 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), - 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), - 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), - 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), - 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + let b: poly16x8_t = vcombine_p16(b, b); + vsetq_lane_p16::(vgetq_lane_p16::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopyq_laneq_f32( @@ -5881,21 +5311,16 @@ pub fn vcopyq_laneq_f32( ) -> float32x4_t { static_assert_uimm_bits!(LANE1, 2); static_assert_uimm_bits!(LANE2, 2); - unsafe { - match LANE1 & 0b11 { - 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), - 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), - 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), - 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + vsetq_lane_f32::(vgetq_lane_f32::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_f64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopyq_laneq_f64( @@ -5904,19 +5329,16 @@ pub fn vcopyq_laneq_f64( ) -> float64x2_t { static_assert_uimm_bits!(LANE1, 1); static_assert_uimm_bits!(LANE2, 1); - unsafe { - match LANE1 & 0b1 { - 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), - 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + vsetq_lane_f64::(vgetq_lane_f64::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopyq_laneq_s8( @@ -5925,369 +5347,16 @@ pub fn vcopyq_laneq_s8( ) -> int8x16_t { static_assert_uimm_bits!(LANE1, 4); static_assert_uimm_bits!(LANE2, 4); - unsafe { - match LANE1 & 0b1111 { - 0 => simd_shuffle!( - a, - b, - [ - 16 + LANE2 as u32, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 1 => simd_shuffle!( - a, - b, - [ - 0, - 16 + LANE2 as u32, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 2 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 16 + LANE2 as u32, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 3 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 16 + LANE2 as u32, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 4 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 16 + LANE2 as u32, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 5 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 16 + LANE2 as u32, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 6 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 16 + LANE2 as u32, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 7 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 16 + LANE2 as u32, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 8 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 16 + LANE2 as u32, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 9 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 16 + LANE2 as u32, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 10 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 16 + LANE2 as u32, - 11, - 12, - 13, - 14, - 15 - ] - ), - 11 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 16 + LANE2 as u32, - 12, - 13, - 14, - 15 - ] - ), - 12 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 16 + LANE2 as u32, - 13, - 14, - 15 - ] - ), - 13 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 16 + LANE2 as u32, - 14, - 15 - ] - ), - 14 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 16 + LANE2 as u32, - 15 - ] - ), - 15 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 16 + LANE2 as u32 - ] - ), - _ => unreachable_unchecked(), - } - } + vsetq_lane_s8::(vgetq_lane_s8::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopyq_laneq_s16( @@ -6296,25 +5365,16 @@ pub fn vcopyq_laneq_s16( ) -> int16x8_t { static_assert_uimm_bits!(LANE1, 3); static_assert_uimm_bits!(LANE2, 3); - unsafe { - match LANE1 & 0b111 { - 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), - 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), - 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), - 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), - 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), - 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), - 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + vsetq_lane_s16::(vgetq_lane_s16::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopyq_laneq_s32( @@ -6323,21 +5383,16 @@ pub fn vcopyq_laneq_s32( ) -> int32x4_t { static_assert_uimm_bits!(LANE1, 2); static_assert_uimm_bits!(LANE2, 2); - unsafe { - match LANE1 & 0b11 { - 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), - 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), - 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), - 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + vsetq_lane_s32::(vgetq_lane_s32::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopyq_laneq_s64( @@ -6346,19 +5401,16 @@ pub fn vcopyq_laneq_s64( ) -> int64x2_t { static_assert_uimm_bits!(LANE1, 1); static_assert_uimm_bits!(LANE2, 1); - unsafe { - match LANE1 & 0b1 { - 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), - 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + vsetq_lane_s64::(vgetq_lane_s64::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopyq_laneq_u8( @@ -6367,369 +5419,16 @@ pub fn vcopyq_laneq_u8( ) -> uint8x16_t { static_assert_uimm_bits!(LANE1, 4); static_assert_uimm_bits!(LANE2, 4); - unsafe { - match LANE1 & 0b1111 { - 0 => simd_shuffle!( - a, - b, - [ - 16 + LANE2 as u32, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 1 => simd_shuffle!( - a, - b, - [ - 0, - 16 + LANE2 as u32, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 2 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 16 + LANE2 as u32, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 3 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 16 + LANE2 as u32, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 4 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 16 + LANE2 as u32, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 5 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 16 + LANE2 as u32, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 6 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 16 + LANE2 as u32, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 7 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 16 + LANE2 as u32, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 8 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 16 + LANE2 as u32, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 9 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 16 + LANE2 as u32, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 10 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 16 + LANE2 as u32, - 11, - 12, - 13, - 14, - 15 - ] - ), - 11 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 16 + LANE2 as u32, - 12, - 13, - 14, - 15 - ] - ), - 12 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 16 + LANE2 as u32, - 13, - 14, - 15 - ] - ), - 13 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 16 + LANE2 as u32, - 14, - 15 - ] - ), - 14 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 16 + LANE2 as u32, - 15 - ] - ), - 15 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 16 + LANE2 as u32 - ] - ), - _ => unreachable_unchecked(), - } - } + vsetq_lane_u8::(vgetq_lane_u8::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopyq_laneq_u16( @@ -6738,25 +5437,16 @@ pub fn vcopyq_laneq_u16( ) -> uint16x8_t { static_assert_uimm_bits!(LANE1, 3); static_assert_uimm_bits!(LANE2, 3); - unsafe { - match LANE1 & 0b111 { - 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), - 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), - 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), - 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), - 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), - 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), - 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + vsetq_lane_u16::(vgetq_lane_u16::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopyq_laneq_u32( @@ -6765,21 +5455,16 @@ pub fn vcopyq_laneq_u32( ) -> uint32x4_t { static_assert_uimm_bits!(LANE1, 2); static_assert_uimm_bits!(LANE2, 2); - unsafe { - match LANE1 & 0b11 { - 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), - 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), - 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), - 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + vsetq_lane_u32::(vgetq_lane_u32::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopyq_laneq_u64( @@ -6788,19 +5473,16 @@ pub fn vcopyq_laneq_u64( ) -> uint64x2_t { static_assert_uimm_bits!(LANE1, 1); static_assert_uimm_bits!(LANE2, 1); - unsafe { - match LANE1 & 0b1 { - 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), - 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + vsetq_lane_u64::(vgetq_lane_u64::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_p8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopyq_laneq_p8( @@ -6809,369 +5491,16 @@ pub fn vcopyq_laneq_p8( ) -> poly8x16_t { static_assert_uimm_bits!(LANE1, 4); static_assert_uimm_bits!(LANE2, 4); - unsafe { - match LANE1 & 0b1111 { - 0 => simd_shuffle!( - a, - b, - [ - 16 + LANE2 as u32, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 1 => simd_shuffle!( - a, - b, - [ - 0, - 16 + LANE2 as u32, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 2 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 16 + LANE2 as u32, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 3 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 16 + LANE2 as u32, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 4 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 16 + LANE2 as u32, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 5 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 16 + LANE2 as u32, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 6 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 16 + LANE2 as u32, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 7 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 16 + LANE2 as u32, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 8 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 16 + LANE2 as u32, - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 9 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 16 + LANE2 as u32, - 10, - 11, - 12, - 13, - 14, - 15 - ] - ), - 10 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 16 + LANE2 as u32, - 11, - 12, - 13, - 14, - 15 - ] - ), - 11 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 16 + LANE2 as u32, - 12, - 13, - 14, - 15 - ] - ), - 12 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 16 + LANE2 as u32, - 13, - 14, - 15 - ] - ), - 13 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 16 + LANE2 as u32, - 14, - 15 - ] - ), - 14 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 16 + LANE2 as u32, - 15 - ] - ), - 15 => simd_shuffle!( - a, - b, - [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 16 + LANE2 as u32 - ] - ), - _ => unreachable_unchecked(), - } - } + vsetq_lane_p8::(vgetq_lane_p8::(b), a) } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_p16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopyq_laneq_p16( @@ -7180,25 +5509,36 @@ pub fn vcopyq_laneq_p16( ) -> poly16x8_t { static_assert_uimm_bits!(LANE1, 3); static_assert_uimm_bits!(LANE2, 3); - unsafe { - match LANE1 & 0b111 { - 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), - 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), - 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), - 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), - 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), - 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), - 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), - _ => unreachable_unchecked(), - } - } + vsetq_lane_p16::(vgetq_lane_p16::(b), a) +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_p64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] +#[rustc_legacy_const_generics(1, 3)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vcopyq_laneq_p64( + a: poly64x2_t, + b: poly64x2_t, +) -> poly64x2_t { + static_assert_uimm_bits!(LANE1, 1); + static_assert_uimm_bits!(LANE2, 1); + unsafe { simd_insert!(a, LANE1 as u32, simd_extract!(b, LANE2 as u32, p64)) } } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))] +#[cfg_attr( + all(test, target_endian = "little"), + assert_instr(mov, LANE1 = 0, LANE2 = 0) +)] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcopyq_laneq_p64( @@ -7208,11 +5548,11 @@ pub fn vcopyq_laneq_p64( static_assert_uimm_bits!(LANE1, 1); static_assert_uimm_bits!(LANE2, 1); unsafe { - match LANE1 & 0b1 { - 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), - 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), - _ => unreachable_unchecked(), - } + let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: poly64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: poly64x2_t = + simd_insert!(a, LANE1 as u32, simd_extract!(b, LANE2 as u32, p64)); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } #[doc = "Insert vector element from another vector element"] @@ -7228,7 +5568,7 @@ pub fn vcreate_f64(a: u64) -> float64x1_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f32_f64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fcvtn))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(fcvtn))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcvt_f32_f64(a: float64x2_t) -> float32x2_t { unsafe { simd_cast(a) } @@ -7282,7 +5622,7 @@ pub fn vcvtq_f64_u64(a: uint64x2_t) -> float64x2_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_high_f16_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fcvtn2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(fcvtn2))] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcvt_high_f16_f32(a: float16x4_t, b: float32x4_t) -> float16x8_t { @@ -7292,7 +5632,7 @@ pub fn vcvt_high_f16_f32(a: float16x4_t, b: float32x4_t) -> float16x8_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_high_f32_f16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fcvtl2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(fcvtl2))] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcvt_high_f32_f16(a: float16x8_t) -> float32x4_t { @@ -7302,22 +5642,19 @@ pub fn vcvt_high_f32_f16(a: float16x8_t) -> float32x4_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_high_f32_f64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fcvtn2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(fcvtn2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcvt_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t { - unsafe { simd_shuffle!(a, simd_cast(b), [0, 1, 2, 3]) } + vcombine_f32(a, vcvt_f32_f64(b)) } #[doc = "Floating-point convert to higher precision long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_high_f64_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fcvtl2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(fcvtl2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcvt_high_f64_f32(a: float32x4_t) -> float64x2_t { - unsafe { - let b: float32x2_t = simd_shuffle!(a, a, [2, 3]); - simd_cast(b) - } + unsafe { simd_cast(vget_high_f32(a)) } } #[doc = "Fixed-point convert to floating-point"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f64_s64)"] @@ -7731,7 +6068,14 @@ pub fn vcvtaq_u64_f64(a: float64x2_t) -> uint64x2_t { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcvtah_s16_f16(a: f16) -> i16 { - vcvtah_s32_f16(a) as i16 + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtas.i16.f16" + )] + fn _vcvtah_s16_f16(a: f16) -> i16; + } + unsafe { _vcvtah_s16_f16(a) } } #[doc = "Floating-point convert to integer, rounding to nearest with ties to away"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtah_s32_f16)"] @@ -7775,7 +6119,14 @@ pub fn vcvtah_s64_f16(a: f16) -> i64 { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcvtah_u16_f16(a: f16) -> u16 { - vcvtah_u32_f16(a) as u16 + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtau.i16.f16" + )] + fn _vcvtah_u16_f16(a: f16) -> u16; + } + unsafe { _vcvtah_u16_f16(a) } } #[doc = "Floating-point convert to integer, rounding to nearest with ties to away"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtah_u32_f16)"] @@ -8417,7 +6768,14 @@ pub fn vcvtmq_u64_f64(a: float64x2_t) -> uint64x2_t { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcvtmh_s16_f16(a: f16) -> i16 { - vcvtmh_s32_f16(a) as i16 + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtms.i16.f16" + )] + fn _vcvtmh_s16_f16(a: f16) -> i16; + } + unsafe { _vcvtmh_s16_f16(a) } } #[doc = "Floating-point convert to integer, rounding towards minus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_s32_f16)"] @@ -8453,7 +6811,7 @@ pub fn vcvtmh_s64_f16(a: f16) -> i64 { } unsafe { _vcvtmh_s64_f16(a) } } -#[doc = "Floating-point convert to integer, rounding towards minus infinity"] +#[doc = "Floating-point convert to unsigned integer, rounding towards minus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_u16_f16)"] #[inline] #[cfg_attr(test, assert_instr(fcvtmu))] @@ -8461,7 +6819,14 @@ pub fn vcvtmh_s64_f16(a: f16) -> i64 { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcvtmh_u16_f16(a: f16) -> u16 { - vcvtmh_u32_f16(a) as u16 + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtmu.i16.f16" + )] + fn _vcvtmh_u16_f16(a: f16) -> u16; + } + unsafe { _vcvtmh_u16_f16(a) } } #[doc = "Floating-point convert to unsigned integer, rounding towards minus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_u32_f16)"] @@ -8765,7 +7130,14 @@ pub fn vcvtnq_u64_f64(a: float64x2_t) -> uint64x2_t { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcvtnh_s16_f16(a: f16) -> i16 { - vcvtnh_s32_f16(a) as i16 + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtns.i16.f16" + )] + fn _vcvtnh_s16_f16(a: f16) -> i16; + } + unsafe { _vcvtnh_s16_f16(a) } } #[doc = "Floating-point convert to integer, rounding to nearest with ties to even"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnh_s32_f16)"] @@ -8809,7 +7181,14 @@ pub fn vcvtnh_s64_f16(a: f16) -> i64 { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcvtnh_u16_f16(a: f16) -> u16 { - vcvtnh_u32_f16(a) as u16 + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtnu.i16.f16" + )] + fn _vcvtnh_u16_f16(a: f16) -> u16; + } + unsafe { _vcvtnh_u16_f16(a) } } #[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnh_u32_f16)"] @@ -9113,7 +7492,14 @@ pub fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcvtph_s16_f16(a: f16) -> i16 { - vcvtph_s32_f16(a) as i16 + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtps.i16.f16" + )] + fn _vcvtph_s16_f16(a: f16) -> i16; + } + unsafe { _vcvtph_s16_f16(a) } } #[doc = "Floating-point convert to integer, rounding to plus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtph_s32_f16)"] @@ -9157,7 +7543,14 @@ pub fn vcvtph_s64_f16(a: f16) -> i64 { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcvtph_u16_f16(a: f16) -> u16 { - vcvtph_u32_f16(a) as u16 + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtpu.i16.f16" + )] + fn _vcvtph_u16_f16(a: f16) -> u16; + } + unsafe { _vcvtph_u16_f16(a) } } #[doc = "Floating-point convert to unsigned integer, rounding to plus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtph_u32_f16)"] @@ -9459,7 +7852,7 @@ pub fn vcvtd_u64_f64(a: f64) -> u64 { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtx_f32_f64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fcvtxn))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(fcvtxn))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcvtx_f32_f64(a: float64x2_t) -> float32x2_t { unsafe extern "unadjusted" { @@ -9475,10 +7868,10 @@ pub fn vcvtx_f32_f64(a: float64x2_t) -> float32x2_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtx_high_f32_f64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fcvtxn2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(fcvtxn2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcvtx_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t { - unsafe { simd_shuffle!(a, vcvtx_f32_f64(b), [0, 1, 2, 3]) } + vcombine_f32(a, vcvtx_f32_f64(b)) } #[doc = "Floating-point convert to lower precision narrow, rounding to odd"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtxd_f32_f64)"] @@ -9487,7 +7880,7 @@ pub fn vcvtx_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t { #[cfg_attr(test, assert_instr(fcvtxn))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vcvtxd_f32_f64(a: f64) -> f32 { - unsafe { simd_extract!(vcvtx_f32_f64(vdupq_n_f64(a)), 0) } + vget_lane_f32::<0>(vcvtx_f32_f64(vdupq_n_f64(a))) } #[doc = "Divide"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdiv_f16)"] @@ -9586,7 +7979,7 @@ pub fn vdup_lane_p64(a: poly64x1_t) -> poly64x1_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vdup_laneq_f64(a: float64x2_t) -> float64x1_t { static_assert_uimm_bits!(N, 1); - unsafe { transmute::(simd_extract!(a, N as u32)) } + unsafe { transmute(vgetq_lane_f64::(a)) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p64)"] @@ -9597,7 +7990,7 @@ pub fn vdup_laneq_f64(a: float64x2_t) -> float64x1_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vdup_laneq_p64(a: poly64x2_t) -> poly64x1_t { static_assert_uimm_bits!(N, 1); - unsafe { transmute::(simd_extract!(a, N as u32)) } + unsafe { transmute(vgetq_lane_p64::(a)) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_lane_s8)"] @@ -9608,7 +8001,7 @@ pub fn vdup_laneq_p64(a: poly64x2_t) -> poly64x1_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vdupb_lane_s8(a: int8x8_t) -> i8 { static_assert_uimm_bits!(N, 3); - unsafe { simd_extract!(a, N as u32) } + vget_lane_s8::(a) } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_laneq_s16)"] @@ -9619,7 +8012,7 @@ pub fn vdupb_lane_s8(a: int8x8_t) -> i8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vduph_laneq_s16(a: int16x8_t) -> i16 { static_assert_uimm_bits!(N, 3); - unsafe { simd_extract!(a, N as u32) } + vgetq_lane_s16::(a) } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_lane_u8)"] @@ -9630,7 +8023,7 @@ pub fn vduph_laneq_s16(a: int16x8_t) -> i16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vdupb_lane_u8(a: uint8x8_t) -> u8 { static_assert_uimm_bits!(N, 3); - unsafe { simd_extract!(a, N as u32) } + vget_lane_u8::(a) } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_laneq_u16)"] @@ -9641,7 +8034,7 @@ pub fn vdupb_lane_u8(a: uint8x8_t) -> u8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vduph_laneq_u16(a: uint16x8_t) -> u16 { static_assert_uimm_bits!(N, 3); - unsafe { simd_extract!(a, N as u32) } + vgetq_lane_u16::(a) } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_lane_p8)"] @@ -9652,7 +8045,7 @@ pub fn vduph_laneq_u16(a: uint16x8_t) -> u16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vdupb_lane_p8(a: poly8x8_t) -> p8 { static_assert_uimm_bits!(N, 3); - unsafe { simd_extract!(a, N as u32) } + vget_lane_p8::(a) } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_laneq_p16)"] @@ -9663,7 +8056,7 @@ pub fn vdupb_lane_p8(a: poly8x8_t) -> p8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vduph_laneq_p16(a: poly16x8_t) -> p16 { static_assert_uimm_bits!(N, 3); - unsafe { simd_extract!(a, N as u32) } + vgetq_lane_p16::(a) } #[doc = "Extract an element from a vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_laneq_s8)"] @@ -9674,7 +8067,7 @@ pub fn vduph_laneq_p16(a: poly16x8_t) -> p16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vdupb_laneq_s8(a: int8x16_t) -> i8 { static_assert_uimm_bits!(N, 4); - unsafe { simd_extract!(a, N as u32) } + vgetq_lane_s8::(a) } #[doc = "Extract an element from a vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_laneq_u8)"] @@ -9685,7 +8078,7 @@ pub fn vdupb_laneq_s8(a: int8x16_t) -> i8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vdupb_laneq_u8(a: uint8x16_t) -> u8 { static_assert_uimm_bits!(N, 4); - unsafe { simd_extract!(a, N as u32) } + vgetq_lane_u8::(a) } #[doc = "Extract an element from a vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_laneq_p8)"] @@ -9696,7 +8089,7 @@ pub fn vdupb_laneq_u8(a: uint8x16_t) -> u8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vdupb_laneq_p8(a: poly8x16_t) -> p8 { static_assert_uimm_bits!(N, 4); - unsafe { simd_extract!(a, N as u32) } + vgetq_lane_p8::(a) } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_lane_f64)"] @@ -9707,7 +8100,7 @@ pub fn vdupb_laneq_p8(a: poly8x16_t) -> p8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vdupd_lane_f64(a: float64x1_t) -> f64 { static_assert!(N == 0); - unsafe { simd_extract!(a, N as u32) } + vget_lane_f64::(a) } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_lane_s64)"] @@ -9718,7 +8111,7 @@ pub fn vdupd_lane_f64(a: float64x1_t) -> f64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vdupd_lane_s64(a: int64x1_t) -> i64 { static_assert!(N == 0); - unsafe { simd_extract!(a, N as u32) } + vget_lane_s64::(a) } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_lane_u64)"] @@ -9729,7 +8122,7 @@ pub fn vdupd_lane_s64(a: int64x1_t) -> i64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vdupd_lane_u64(a: uint64x1_t) -> u64 { static_assert!(N == 0); - unsafe { simd_extract!(a, N as u32) } + vget_lane_u64::(a) } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_lane_f16)"] @@ -9741,7 +8134,7 @@ pub fn vdupd_lane_u64(a: uint64x1_t) -> u64 { #[cfg(not(target_arch = "arm64ec"))] pub fn vduph_lane_f16(a: float16x4_t) -> f16 { static_assert_uimm_bits!(N, 2); - unsafe { simd_extract!(a, N as u32) } + vget_lane_f16::(a) } #[doc = "Extract an element from a vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_laneq_f16)"] @@ -9753,11 +8146,12 @@ pub fn vduph_lane_f16(a: float16x4_t) -> f16 { #[cfg(not(target_arch = "arm64ec"))] pub fn vduph_laneq_f16(a: float16x8_t) -> f16 { static_assert_uimm_bits!(N, 4); - unsafe { simd_extract!(a, N as u32) } + vgetq_lane_f16::(a) } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(dup, N = 0))] #[rustc_legacy_const_generics(1)] @@ -9767,8 +8161,24 @@ pub fn vdupq_lane_f64(a: float64x1_t) -> float64x2_t { unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } } #[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(dup, N = 0))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vdupq_lane_f64(a: float64x1_t) -> float64x2_t { + static_assert!(N == 0); + unsafe { + let ret_val: float64x2_t = simd_shuffle!(a, a, [N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(dup, N = 0))] #[rustc_legacy_const_generics(1)] @@ -9778,8 +8188,24 @@ pub fn vdupq_lane_p64(a: poly64x1_t) -> poly64x2_t { unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } } #[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(dup, N = 0))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vdupq_lane_p64(a: poly64x1_t) -> poly64x2_t { + static_assert!(N == 0); + unsafe { + let ret_val: poly64x2_t = simd_shuffle!(a, a, [N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(dup, N = 1))] #[rustc_legacy_const_generics(1)] @@ -9789,8 +8215,25 @@ pub fn vdupq_laneq_f64(a: float64x2_t) -> float64x2_t { unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } } #[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(dup, N = 1))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vdupq_laneq_f64(a: float64x2_t) -> float64x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: float64x2_t = simd_shuffle!(a, a, [N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(dup, N = 1))] #[rustc_legacy_const_generics(1)] @@ -9800,6 +8243,22 @@ pub fn vdupq_laneq_p64(a: poly64x2_t) -> poly64x2_t { unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } } #[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(dup, N = 1))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vdupq_laneq_p64(a: poly64x2_t) -> poly64x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: poly64x2_t = simd_shuffle!(a, a, [N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_lane_f32)"] #[inline] #[target_feature(enable = "neon")] @@ -9808,7 +8267,7 @@ pub fn vdupq_laneq_p64(a: poly64x2_t) -> poly64x2_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vdups_lane_f32(a: float32x2_t) -> f32 { static_assert_uimm_bits!(N, 1); - unsafe { simd_extract!(a, N as u32) } + vget_lane_f32::(a) } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_laneq_f64)"] @@ -9819,7 +8278,7 @@ pub fn vdups_lane_f32(a: float32x2_t) -> f32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vdupd_laneq_f64(a: float64x2_t) -> f64 { static_assert_uimm_bits!(N, 1); - unsafe { simd_extract!(a, N as u32) } + vgetq_lane_f64::(a) } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_lane_s32)"] @@ -9830,7 +8289,7 @@ pub fn vdupd_laneq_f64(a: float64x2_t) -> f64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vdups_lane_s32(a: int32x2_t) -> i32 { static_assert_uimm_bits!(N, 1); - unsafe { simd_extract!(a, N as u32) } + vget_lane_s32::(a) } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_laneq_s64)"] @@ -9841,7 +8300,7 @@ pub fn vdups_lane_s32(a: int32x2_t) -> i32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vdupd_laneq_s64(a: int64x2_t) -> i64 { static_assert_uimm_bits!(N, 1); - unsafe { simd_extract!(a, N as u32) } + vgetq_lane_s64::(a) } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_lane_u32)"] @@ -9852,7 +8311,7 @@ pub fn vdupd_laneq_s64(a: int64x2_t) -> i64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vdups_lane_u32(a: uint32x2_t) -> u32 { static_assert_uimm_bits!(N, 1); - unsafe { simd_extract!(a, N as u32) } + vget_lane_u32::(a) } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_laneq_u64)"] @@ -9863,7 +8322,7 @@ pub fn vdups_lane_u32(a: uint32x2_t) -> u32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vdupd_laneq_u64(a: uint64x2_t) -> u64 { static_assert_uimm_bits!(N, 1); - unsafe { simd_extract!(a, N as u32) } + vgetq_lane_u64::(a) } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_laneq_f32)"] @@ -9874,7 +8333,7 @@ pub fn vdupd_laneq_u64(a: uint64x2_t) -> u64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vdups_laneq_f32(a: float32x4_t) -> f32 { static_assert_uimm_bits!(N, 2); - unsafe { simd_extract!(a, N as u32) } + vgetq_lane_f32::(a) } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_lane_s16)"] @@ -9885,7 +8344,7 @@ pub fn vdups_laneq_f32(a: float32x4_t) -> f32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vduph_lane_s16(a: int16x4_t) -> i16 { static_assert_uimm_bits!(N, 2); - unsafe { simd_extract!(a, N as u32) } + vget_lane_s16::(a) } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_laneq_s32)"] @@ -9896,7 +8355,7 @@ pub fn vduph_lane_s16(a: int16x4_t) -> i16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vdups_laneq_s32(a: int32x4_t) -> i32 { static_assert_uimm_bits!(N, 2); - unsafe { simd_extract!(a, N as u32) } + vgetq_lane_s32::(a) } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_lane_u16)"] @@ -9907,7 +8366,7 @@ pub fn vdups_laneq_s32(a: int32x4_t) -> i32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vduph_lane_u16(a: uint16x4_t) -> u16 { static_assert_uimm_bits!(N, 2); - unsafe { simd_extract!(a, N as u32) } + vget_lane_u16::(a) } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_laneq_u32)"] @@ -9918,7 +8377,7 @@ pub fn vduph_lane_u16(a: uint16x4_t) -> u16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vdups_laneq_u32(a: uint32x4_t) -> u32 { static_assert_uimm_bits!(N, 2); - unsafe { simd_extract!(a, N as u32) } + vgetq_lane_u32::(a) } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_lane_p16)"] @@ -9929,7 +8388,7 @@ pub fn vdups_laneq_u32(a: uint32x4_t) -> u32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vduph_lane_p16(a: poly16x4_t) -> p16 { static_assert_uimm_bits!(N, 2); - unsafe { simd_extract!(a, N as u32) } + vget_lane_p16::(a) } #[doc = "Three-way exclusive OR"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_s8)"] @@ -10062,6 +8521,19 @@ pub fn veor3q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t { #[doc = "Extract vector from pair of vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f64)"] #[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ext, N = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vextq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) } +} +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f64)"] +#[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ext, N = 1))] #[rustc_legacy_const_generics(2)] @@ -10069,16 +8541,28 @@ pub fn veor3q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t { pub fn vextq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { static_assert_uimm_bits!(N, 1); unsafe { - match N & 0b1 { - 0 => simd_shuffle!(a, b, [0, 1]), - 1 => simd_shuffle!(a, b, [1, 2]), - _ => unreachable_unchecked(), - } + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float64x2_t = simd_shuffle!(a, b, [N as u32, N as u32 + 1]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } #[doc = "Extract vector from pair of vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p64)"] #[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ext, N = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vextq_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) } +} +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p64)"] +#[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ext, N = 1))] #[rustc_legacy_const_generics(2)] @@ -10086,11 +8570,10 @@ pub fn vextq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { pub fn vextq_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { static_assert_uimm_bits!(N, 1); unsafe { - match N & 0b1 { - 0 => simd_shuffle!(a, b, [0, 1]), - 1 => simd_shuffle!(a, b, [1, 2]), - _ => unreachable_unchecked(), - } + let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: poly64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: poly64x2_t = simd_shuffle!(a, b, [N as u32, N as u32 + 1]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } #[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] @@ -10116,7 +8599,7 @@ pub fn vfma_lane_f16( c: float16x4_t, ) -> float16x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vfma_f16(a, b, vdup_n_f16(simd_extract!(c, LANE as u32))) } + vfma_f16(a, b, vdup_n_f16(vget_lane_f16::(c))) } #[doc = "Floating-point fused multiply-add to accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_laneq_f16)"] @@ -10132,7 +8615,7 @@ pub fn vfma_laneq_f16( c: float16x8_t, ) -> float16x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { vfma_f16(a, b, vdup_n_f16(simd_extract!(c, LANE as u32))) } + vfma_f16(a, b, vdup_n_f16(vgetq_lane_f16::(c))) } #[doc = "Floating-point fused multiply-add to accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_lane_f16)"] @@ -10148,7 +8631,7 @@ pub fn vfmaq_lane_f16( c: float16x4_t, ) -> float16x8_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vfmaq_f16(a, b, vdupq_n_f16(simd_extract!(c, LANE as u32))) } + vfmaq_f16(a, b, vdupq_n_f16(vget_lane_f16::(c))) } #[doc = "Floating-point fused multiply-add to accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_laneq_f16)"] @@ -10164,7 +8647,7 @@ pub fn vfmaq_laneq_f16( c: float16x8_t, ) -> float16x8_t { static_assert_uimm_bits!(LANE, 3); - unsafe { vfmaq_f16(a, b, vdupq_n_f16(simd_extract!(c, LANE as u32))) } + vfmaq_f16(a, b, vdupq_n_f16(vgetq_lane_f16::(c))) } #[doc = "Floating-point fused multiply-add to accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_lane_f32)"] @@ -10179,7 +8662,7 @@ pub fn vfma_lane_f32( c: float32x2_t, ) -> float32x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { vfma_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32))) } + vfma_f32(a, b, vdup_n_f32(vget_lane_f32::(c))) } #[doc = "Floating-point fused multiply-add to accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_laneq_f32)"] @@ -10194,7 +8677,7 @@ pub fn vfma_laneq_f32( c: float32x4_t, ) -> float32x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vfma_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32))) } + vfma_f32(a, b, vdup_n_f32(vgetq_lane_f32::(c))) } #[doc = "Floating-point fused multiply-add to accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_lane_f32)"] @@ -10209,7 +8692,7 @@ pub fn vfmaq_lane_f32( c: float32x2_t, ) -> float32x4_t { static_assert_uimm_bits!(LANE, 1); - unsafe { vfmaq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32))) } + vfmaq_f32(a, b, vdupq_n_f32(vget_lane_f32::(c))) } #[doc = "Floating-point fused multiply-add to accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_laneq_f32)"] @@ -10224,7 +8707,7 @@ pub fn vfmaq_laneq_f32( c: float32x4_t, ) -> float32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vfmaq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32))) } + vfmaq_f32(a, b, vdupq_n_f32(vgetq_lane_f32::(c))) } #[doc = "Floating-point fused multiply-add to accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_laneq_f64)"] @@ -10239,7 +8722,7 @@ pub fn vfmaq_laneq_f64( c: float64x2_t, ) -> float64x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { vfmaq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32))) } + vfmaq_f64(a, b, vdupq_n_f64(vgetq_lane_f64::(c))) } #[doc = "Floating-point fused multiply-add to accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_lane_f64)"] @@ -10254,13 +8737,13 @@ pub fn vfma_lane_f64( c: float64x1_t, ) -> float64x1_t { static_assert!(LANE == 0); - unsafe { vfma_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32))) } + vfma_f64(a, b, vdup_n_f64(vget_lane_f64::(c))) } #[doc = "Floating-point fused multiply-add to accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_laneq_f64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fmadd, LANE = 0))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmadd, LANE = 0))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vfma_laneq_f64( @@ -10269,7 +8752,7 @@ pub fn vfma_laneq_f64( c: float64x2_t, ) -> float64x1_t { static_assert_uimm_bits!(LANE, 1); - unsafe { vfma_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32))) } + vfma_f64(a, b, vdup_n_f64(vgetq_lane_f64::(c))) } #[doc = "Floating-point fused Multiply-Subtract from accumulator."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_n_f16)"] @@ -10309,10 +8792,8 @@ pub fn vfma_n_f64(a: float64x1_t, b: float64x1_t, c: f64) -> float64x1_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vfmad_lane_f64(a: f64, b: f64, c: float64x1_t) -> f64 { static_assert!(LANE == 0); - unsafe { - let c: f64 = simd_extract!(c, LANE as u32); - fmaf64(b, c, a) - } + let c: f64 = vget_lane_f64::(c); + fmaf64(b, c, a) } #[doc = "Floating-point fused multiply-add to accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmah_f16)"] @@ -10327,32 +8808,28 @@ pub fn vfmah_f16(a: f16, b: f16, c: f16) -> f16 { #[doc = "Floating-point fused multiply-add to accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmah_lane_f16)"] #[inline] -#[cfg_attr(test, assert_instr(fmadd, LANE = 0))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmadd, LANE = 0))] #[rustc_legacy_const_generics(3)] #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vfmah_lane_f16(a: f16, b: f16, v: float16x4_t) -> f16 { static_assert_uimm_bits!(LANE, 2); - unsafe { - let c: f16 = simd_extract!(v, LANE as u32); - vfmah_f16(a, b, c) - } + let c: f16 = vget_lane_f16::(v); + vfmah_f16(a, b, c) } #[doc = "Floating-point fused multiply-add to accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmah_laneq_f16)"] #[inline] -#[cfg_attr(test, assert_instr(fmadd, LANE = 0))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmadd, LANE = 0))] #[rustc_legacy_const_generics(3)] #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vfmah_laneq_f16(a: f16, b: f16, v: float16x8_t) -> f16 { static_assert_uimm_bits!(LANE, 3); - unsafe { - let c: f16 = simd_extract!(v, LANE as u32); - vfmah_f16(a, b, c) - } + let c: f16 = vgetq_lane_f16::(v); + vfmah_f16(a, b, c) } #[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_f64)"] @@ -10376,7 +8853,7 @@ pub fn vfmaq_lane_f64( c: float64x1_t, ) -> float64x2_t { static_assert!(LANE == 0); - unsafe { vfmaq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32))) } + vfmaq_f64(a, b, vdupq_n_f64(vget_lane_f64::(c))) } #[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_n_f64)"] @@ -10391,47 +8868,42 @@ pub fn vfmaq_n_f64(a: float64x2_t, b: float64x2_t, c: f64) -> float64x2_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmas_lane_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fmadd, LANE = 0))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmadd, LANE = 0))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vfmas_lane_f32(a: f32, b: f32, c: float32x2_t) -> f32 { static_assert_uimm_bits!(LANE, 1); - unsafe { - let c: f32 = simd_extract!(c, LANE as u32); - fmaf32(b, c, a) - } + let c: f32 = vget_lane_f32::(c); + fmaf32(b, c, a) } #[doc = "Floating-point fused multiply-add to accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmas_laneq_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fmadd, LANE = 0))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmadd, LANE = 0))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vfmas_laneq_f32(a: f32, b: f32, c: float32x4_t) -> f32 { static_assert_uimm_bits!(LANE, 2); - unsafe { - let c: f32 = simd_extract!(c, LANE as u32); - fmaf32(b, c, a) - } + let c: f32 = vgetq_lane_f32::(c); + fmaf32(b, c, a) } #[doc = "Floating-point fused multiply-add to accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmad_laneq_f64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fmadd, LANE = 0))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmadd, LANE = 0))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vfmad_laneq_f64(a: f64, b: f64, c: float64x2_t) -> f64 { static_assert_uimm_bits!(LANE, 1); - unsafe { - let c: f64 = simd_extract!(c, LANE as u32); - fmaf64(b, c, a) - } + let c: f64 = vgetq_lane_f64::(c); + fmaf64(b, c, a) } #[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_high_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] @@ -10448,8 +8920,34 @@ pub fn vfmlal_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float3 unsafe { _vfmlal_high_f16(r, a, b) } } #[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_high_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fmlal2))] +pub fn vfmlal_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlal2.v2f32.v4f16" + )] + fn _vfmlal_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t; + } + unsafe { + let r: float32x2_t = simd_shuffle!(r, r, [1, 0]); + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x2_t = _vfmlal_high_f16(r, a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_high_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] @@ -10465,6 +8963,31 @@ pub fn vfmlalq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float } unsafe { _vfmlalq_high_f16(r, a, b) } } +#[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_high_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fmlal2))] +pub fn vfmlalq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlal2.v4f32.v8f16" + )] + fn _vfmlalq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t; + } + unsafe { + let r: float32x4_t = simd_shuffle!(r, r, [3, 2, 1, 0]); + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float32x4_t = _vfmlalq_high_f16(r, a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} #[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_lane_high_f16)"] #[inline] @@ -10480,7 +9003,7 @@ pub fn vfmlal_lane_high_f16( b: float16x4_t, ) -> float32x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vfmlal_high_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) } + vfmlal_high_f16(r, a, vdup_lane_f16::(b)) } #[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_laneq_high_f16)"] @@ -10497,7 +9020,7 @@ pub fn vfmlal_laneq_high_f16( b: float16x8_t, ) -> float32x2_t { static_assert_uimm_bits!(LANE, 3); - unsafe { vfmlal_high_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) } + vfmlal_high_f16(r, a, vdup_laneq_f16::(b)) } #[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_lane_high_f16)"] @@ -10514,7 +9037,7 @@ pub fn vfmlalq_lane_high_f16( b: float16x4_t, ) -> float32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vfmlalq_high_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) } + vfmlalq_high_f16(r, a, vdupq_lane_f16::(b)) } #[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_laneq_high_f16)"] @@ -10531,7 +9054,7 @@ pub fn vfmlalq_laneq_high_f16( b: float16x8_t, ) -> float32x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { vfmlalq_high_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) } + vfmlalq_high_f16(r, a, vdupq_laneq_f16::(b)) } #[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_lane_low_f16)"] @@ -10548,7 +9071,7 @@ pub fn vfmlal_lane_low_f16( b: float16x4_t, ) -> float32x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vfmlal_low_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) } + vfmlal_low_f16(r, a, vdup_lane_f16::(b)) } #[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_laneq_low_f16)"] @@ -10565,7 +9088,7 @@ pub fn vfmlal_laneq_low_f16( b: float16x8_t, ) -> float32x2_t { static_assert_uimm_bits!(LANE, 3); - unsafe { vfmlal_low_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) } + vfmlal_low_f16(r, a, vdup_laneq_f16::(b)) } #[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_lane_low_f16)"] @@ -10582,7 +9105,7 @@ pub fn vfmlalq_lane_low_f16( b: float16x4_t, ) -> float32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vfmlalq_low_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) } + vfmlalq_low_f16(r, a, vdupq_lane_f16::(b)) } #[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_laneq_low_f16)"] @@ -10599,11 +9122,12 @@ pub fn vfmlalq_laneq_low_f16( b: float16x8_t, ) -> float32x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { vfmlalq_low_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) } + vfmlalq_low_f16(r, a, vdupq_laneq_f16::(b)) } #[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_low_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] @@ -10620,8 +9144,34 @@ pub fn vfmlal_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32 unsafe { _vfmlal_low_f16(r, a, b) } } #[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_low_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fmlal))] +pub fn vfmlal_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlal.v2f32.v4f16" + )] + fn _vfmlal_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t; + } + unsafe { + let r: float32x2_t = simd_shuffle!(r, r, [1, 0]); + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x2_t = _vfmlal_low_f16(r, a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_low_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] @@ -10637,9 +9187,35 @@ pub fn vfmlalq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float3 } unsafe { _vfmlalq_low_f16(r, a, b) } } +#[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_low_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fmlal))] +pub fn vfmlalq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlal.v4f32.v8f16" + )] + fn _vfmlalq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t; + } + unsafe { + let r: float32x4_t = simd_shuffle!(r, r, [3, 2, 1, 0]); + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float32x4_t = _vfmlalq_low_f16(r, a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_high_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] @@ -10656,22 +9232,73 @@ pub fn vfmlsl_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float3 unsafe { _vfmlsl_high_f16(r, a, b) } } #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_high_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_high_f16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,fp16")] #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] #[cfg_attr(test, assert_instr(fmlsl2))] -pub fn vfmlslq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t { +pub fn vfmlsl_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmlsl2.v4f32.v8f16" + link_name = "llvm.aarch64.neon.fmlsl2.v2f32.v4f16" )] - fn _vfmlslq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t; + fn _vfmlsl_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t; + } + unsafe { + let r: float32x2_t = simd_shuffle!(r, r, [1, 0]); + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x2_t = _vfmlsl_high_f16(r, a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_high_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fmlsl2))] +pub fn vfmlslq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlsl2.v4f32.v8f16" + )] + fn _vfmlslq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t; + } + unsafe { _vfmlslq_high_f16(r, a, b) } +} +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_high_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fmlsl2))] +pub fn vfmlslq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlsl2.v4f32.v8f16" + )] + fn _vfmlslq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t; + } + unsafe { + let r: float32x4_t = simd_shuffle!(r, r, [3, 2, 1, 0]); + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float32x4_t = _vfmlslq_high_f16(r, a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vfmlslq_high_f16(r, a, b) } } #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_lane_high_f16)"] @@ -10688,7 +9315,7 @@ pub fn vfmlsl_lane_high_f16( b: float16x4_t, ) -> float32x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vfmlsl_high_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) } + vfmlsl_high_f16(r, a, vdup_lane_f16::(b)) } #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_laneq_high_f16)"] @@ -10705,7 +9332,7 @@ pub fn vfmlsl_laneq_high_f16( b: float16x8_t, ) -> float32x2_t { static_assert_uimm_bits!(LANE, 3); - unsafe { vfmlsl_high_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) } + vfmlsl_high_f16(r, a, vdup_laneq_f16::(b)) } #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_lane_high_f16)"] @@ -10722,7 +9349,7 @@ pub fn vfmlslq_lane_high_f16( b: float16x4_t, ) -> float32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vfmlslq_high_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) } + vfmlslq_high_f16(r, a, vdupq_lane_f16::(b)) } #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_laneq_high_f16)"] @@ -10739,7 +9366,7 @@ pub fn vfmlslq_laneq_high_f16( b: float16x8_t, ) -> float32x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { vfmlslq_high_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) } + vfmlslq_high_f16(r, a, vdupq_laneq_f16::(b)) } #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_lane_low_f16)"] @@ -10756,7 +9383,7 @@ pub fn vfmlsl_lane_low_f16( b: float16x4_t, ) -> float32x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vfmlsl_low_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) } + vfmlsl_low_f16(r, a, vdup_lane_f16::(b)) } #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_laneq_low_f16)"] @@ -10773,7 +9400,7 @@ pub fn vfmlsl_laneq_low_f16( b: float16x8_t, ) -> float32x2_t { static_assert_uimm_bits!(LANE, 3); - unsafe { vfmlsl_low_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) } + vfmlsl_low_f16(r, a, vdup_laneq_f16::(b)) } #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_lane_low_f16)"] @@ -10790,7 +9417,7 @@ pub fn vfmlslq_lane_low_f16( b: float16x4_t, ) -> float32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vfmlslq_low_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) } + vfmlslq_low_f16(r, a, vdupq_lane_f16::(b)) } #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_laneq_low_f16)"] @@ -10807,11 +9434,12 @@ pub fn vfmlslq_laneq_low_f16( b: float16x8_t, ) -> float32x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { vfmlslq_low_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) } + vfmlslq_low_f16(r, a, vdupq_laneq_f16::(b)) } #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_low_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] @@ -10828,8 +9456,34 @@ pub fn vfmlsl_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32 unsafe { _vfmlsl_low_f16(r, a, b) } } #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_low_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fmlsl))] +pub fn vfmlsl_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlsl.v2f32.v4f16" + )] + fn _vfmlsl_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t; + } + unsafe { + let r: float32x2_t = simd_shuffle!(r, r, [1, 0]); + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x2_t = _vfmlsl_low_f16(r, a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_low_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] @@ -10845,6 +9499,31 @@ pub fn vfmlslq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float3 } unsafe { _vfmlslq_low_f16(r, a, b) } } +#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_low_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fmlsl))] +pub fn vfmlslq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmlsl.v4f32.v8f16" + )] + fn _vfmlslq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t; + } + unsafe { + let r: float32x4_t = simd_shuffle!(r, r, [3, 2, 1, 0]); + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float32x4_t = _vfmlslq_low_f16(r, a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} #[doc = "Floating-point fused multiply-subtract from accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f64)"] #[inline] @@ -10871,7 +9550,7 @@ pub fn vfms_lane_f16( c: float16x4_t, ) -> float16x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vfms_f16(a, b, vdup_n_f16(simd_extract!(c, LANE as u32))) } + vfms_f16(a, b, vdup_n_f16(vget_lane_f16::(c))) } #[doc = "Floating-point fused multiply-subtract from accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_laneq_f16)"] @@ -10887,7 +9566,7 @@ pub fn vfms_laneq_f16( c: float16x8_t, ) -> float16x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { vfms_f16(a, b, vdup_n_f16(simd_extract!(c, LANE as u32))) } + vfms_f16(a, b, vdup_n_f16(vgetq_lane_f16::(c))) } #[doc = "Floating-point fused multiply-subtract from accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_lane_f16)"] @@ -10903,7 +9582,7 @@ pub fn vfmsq_lane_f16( c: float16x4_t, ) -> float16x8_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vfmsq_f16(a, b, vdupq_n_f16(simd_extract!(c, LANE as u32))) } + vfmsq_f16(a, b, vdupq_n_f16(vget_lane_f16::(c))) } #[doc = "Floating-point fused multiply-subtract from accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_laneq_f16)"] @@ -10919,7 +9598,7 @@ pub fn vfmsq_laneq_f16( c: float16x8_t, ) -> float16x8_t { static_assert_uimm_bits!(LANE, 3); - unsafe { vfmsq_f16(a, b, vdupq_n_f16(simd_extract!(c, LANE as u32))) } + vfmsq_f16(a, b, vdupq_n_f16(vgetq_lane_f16::(c))) } #[doc = "Floating-point fused multiply-subtract to accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_lane_f32)"] @@ -10934,7 +9613,7 @@ pub fn vfms_lane_f32( c: float32x2_t, ) -> float32x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { vfms_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32))) } + vfms_f32(a, b, vdup_n_f32(vget_lane_f32::(c))) } #[doc = "Floating-point fused multiply-subtract to accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_laneq_f32)"] @@ -10949,7 +9628,7 @@ pub fn vfms_laneq_f32( c: float32x4_t, ) -> float32x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vfms_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32))) } + vfms_f32(a, b, vdup_n_f32(vgetq_lane_f32::(c))) } #[doc = "Floating-point fused multiply-subtract to accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_lane_f32)"] @@ -10964,7 +9643,7 @@ pub fn vfmsq_lane_f32( c: float32x2_t, ) -> float32x4_t { static_assert_uimm_bits!(LANE, 1); - unsafe { vfmsq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32))) } + vfmsq_f32(a, b, vdupq_n_f32(vget_lane_f32::(c))) } #[doc = "Floating-point fused multiply-subtract to accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_laneq_f32)"] @@ -10979,7 +9658,7 @@ pub fn vfmsq_laneq_f32( c: float32x4_t, ) -> float32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vfmsq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32))) } + vfmsq_f32(a, b, vdupq_n_f32(vgetq_lane_f32::(c))) } #[doc = "Floating-point fused multiply-subtract to accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_laneq_f64)"] @@ -10994,7 +9673,7 @@ pub fn vfmsq_laneq_f64( c: float64x2_t, ) -> float64x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { vfmsq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32))) } + vfmsq_f64(a, b, vdupq_n_f64(vgetq_lane_f64::(c))) } #[doc = "Floating-point fused multiply-subtract to accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_lane_f64)"] @@ -11009,13 +9688,13 @@ pub fn vfms_lane_f64( c: float64x1_t, ) -> float64x1_t { static_assert!(LANE == 0); - unsafe { vfms_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32))) } + vfms_f64(a, b, vdup_n_f64(vget_lane_f64::(c))) } #[doc = "Floating-point fused multiply-subtract to accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_laneq_f64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fmsub, LANE = 0))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmsub, LANE = 0))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vfms_laneq_f64( @@ -11024,7 +9703,7 @@ pub fn vfms_laneq_f64( c: float64x2_t, ) -> float64x1_t { static_assert_uimm_bits!(LANE, 1); - unsafe { vfms_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32))) } + vfms_f64(a, b, vdup_n_f64(vgetq_lane_f64::(c))) } #[doc = "Floating-point fused Multiply-Subtract from accumulator."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_n_f16)"] @@ -11068,32 +9747,28 @@ pub fn vfmsh_f16(a: f16, b: f16, c: f16) -> f16 { #[doc = "Floating-point fused multiply-subtract from accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsh_lane_f16)"] #[inline] -#[cfg_attr(test, assert_instr(fmsub, LANE = 0))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmsub, LANE = 0))] #[rustc_legacy_const_generics(3)] #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vfmsh_lane_f16(a: f16, b: f16, v: float16x4_t) -> f16 { static_assert_uimm_bits!(LANE, 2); - unsafe { - let c: f16 = simd_extract!(v, LANE as u32); - vfmsh_f16(a, b, c) - } + let c: f16 = vget_lane_f16::(v); + vfmsh_f16(a, b, c) } #[doc = "Floating-point fused multiply-subtract from accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsh_laneq_f16)"] #[inline] -#[cfg_attr(test, assert_instr(fmsub, LANE = 0))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmsub, LANE = 0))] #[rustc_legacy_const_generics(3)] #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vfmsh_laneq_f16(a: f16, b: f16, v: float16x8_t) -> f16 { static_assert_uimm_bits!(LANE, 3); - unsafe { - let c: f16 = simd_extract!(v, LANE as u32); - vfmsh_f16(a, b, c) - } + let c: f16 = vgetq_lane_f16::(v); + vfmsh_f16(a, b, c) } #[doc = "Floating-point fused multiply-subtract from accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_f64)"] @@ -11120,7 +9795,7 @@ pub fn vfmsq_lane_f64( c: float64x1_t, ) -> float64x2_t { static_assert!(LANE == 0); - unsafe { vfmsq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32))) } + vfmsq_f64(a, b, vdupq_n_f64(vget_lane_f64::(c))) } #[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_n_f64)"] @@ -11135,7 +9810,7 @@ pub fn vfmsq_n_f64(a: float64x2_t, b: float64x2_t, c: f64) -> float64x2_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmss_lane_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fmsub, LANE = 0))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmsub, LANE = 0))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vfmss_lane_f32(a: f32, b: f32, c: float32x2_t) -> f32 { @@ -11145,7 +9820,7 @@ pub fn vfmss_lane_f32(a: f32, b: f32, c: float32x2_t) -> f32 { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmss_laneq_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fmsub, LANE = 0))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmsub, LANE = 0))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vfmss_laneq_f32(a: f32, b: f32, c: float32x4_t) -> f32 { @@ -11155,7 +9830,7 @@ pub fn vfmss_laneq_f32(a: f32, b: f32, c: float32x4_t) -> f32 { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsd_lane_f64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fmsub, LANE = 0))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmsub, LANE = 0))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vfmsd_lane_f64(a: f64, b: f64, c: float64x1_t) -> f64 { @@ -11165,12 +9840,85 @@ pub fn vfmsd_lane_f64(a: f64, b: f64, c: float64x1_t) -> f64 { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsd_laneq_f64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fmsub, LANE = 0))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmsub, LANE = 0))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vfmsd_laneq_f64(a: f64, b: f64, c: float64x2_t) -> f64 { vfmad_laneq_f64::(a, -b, c) } +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vget_high_f64(a: float64x2_t) -> float64x1_t { + unsafe { float64x1_t([simd_extract!(a, 1)]) } +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vget_high_f64(a: float64x2_t) -> float64x1_t { + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + float64x1_t([simd_extract!(a, 1)]) + } +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vget_low_f64(a: float64x2_t) -> float64x1_t { + unsafe { float64x1_t([simd_extract!(a, 0)]) } +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub fn vget_low_f64(a: float64x2_t) -> float64x1_t { + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + float64x1_t([simd_extract!(a, 0)]) + } +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 0))] +pub fn vgetq_lane_f64(a: float64x2_t) -> f64 { + static_assert_uimm_bits!(IMM5, 1); + unsafe { simd_extract!(a, IMM5 as u32) } +} +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 0))] +pub fn vgetq_lane_f64(a: float64x2_t) -> f64 { + static_assert_uimm_bits!(IMM5, 1); + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + simd_extract!(a, IMM5 as u32) + } +} #[doc = "Load multiple single-element structures to one, two, three, or four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16)"] #[doc = "## Safety"] @@ -11683,7 +10431,6 @@ pub unsafe fn vld2_lane_u64(a: *const u64, b: uint64x1x2_t) -> #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(ld2r))] @@ -11691,47 +10438,16 @@ pub unsafe fn vld2q_dup_p64(a: *const p64) -> poly64x2x2_t { transmute(vld2q_dup_s64(transmute(a))) } #[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2r))] -pub unsafe fn vld2q_dup_p64(a: *const p64) -> poly64x2x2_t { - let mut ret_val: poly64x2x2_t = transmute(vld2q_dup_s64(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); - ret_val -} -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(ld2r))] pub unsafe fn vld2q_dup_u64(a: *const u64) -> uint64x2x2_t { transmute(vld2q_dup_s64(transmute(a))) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2r))] -pub unsafe fn vld2q_dup_u64(a: *const u64) -> uint64x2x2_t { - let mut ret_val: uint64x2x2_t = transmute(vld2q_dup_s64(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); - ret_val -} #[doc = "Load multiple 2-element structures to two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f64)"] #[doc = "## Safety"] @@ -11886,7 +10602,6 @@ pub unsafe fn vld2q_lane_p8(a: *const p8, b: poly8x16x2_t) -> p #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(ld2))] @@ -11894,21 +10609,6 @@ pub unsafe fn vld2q_p64(a: *const p64) -> poly64x2x2_t { transmute(vld2q_s64(transmute(a))) } #[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2))] -pub unsafe fn vld2q_p64(a: *const p64) -> poly64x2x2_t { - let mut ret_val: poly64x2x2_t = transmute(vld2q_s64(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); - ret_val -} -#[doc = "Load multiple 2-element structures to two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] @@ -12067,7 +10767,6 @@ pub unsafe fn vld3_lane_u64(a: *const u64, b: uint64x1x3_t) -> #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(ld3r))] @@ -12075,49 +10774,16 @@ pub unsafe fn vld3q_dup_p64(a: *const p64) -> poly64x2x3_t { transmute(vld3q_dup_s64(transmute(a))) } #[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld3r))] -pub unsafe fn vld3q_dup_p64(a: *const p64) -> poly64x2x3_t { - let mut ret_val: poly64x2x3_t = transmute(vld3q_dup_s64(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]); - ret_val -} -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(ld3r))] pub unsafe fn vld3q_dup_u64(a: *const u64) -> uint64x2x3_t { transmute(vld3q_dup_s64(transmute(a))) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld3r))] -pub unsafe fn vld3q_dup_u64(a: *const u64) -> uint64x2x3_t { - let mut ret_val: uint64x2x3_t = transmute(vld3q_dup_s64(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]); - ret_val -} #[doc = "Load multiple 3-element structures to three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f64)"] #[doc = "## Safety"] @@ -12275,7 +10941,6 @@ pub unsafe fn vld3q_lane_p8(a: *const p8, b: poly8x16x3_t) -> p #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(ld3))] @@ -12283,22 +10948,6 @@ pub unsafe fn vld3q_p64(a: *const p64) -> poly64x2x3_t { transmute(vld3q_s64(transmute(a))) } #[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld3))] -pub unsafe fn vld3q_p64(a: *const p64) -> poly64x2x3_t { - let mut ret_val: poly64x2x3_t = transmute(vld3q_s64(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]); - ret_val -} -#[doc = "Load multiple 3-element structures to three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] @@ -12459,7 +11108,6 @@ pub unsafe fn vld4_lane_u64(a: *const u64, b: uint64x1x4_t) -> #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(test, assert_instr(ld4r))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -12467,51 +11115,16 @@ pub unsafe fn vld4q_dup_p64(a: *const p64) -> poly64x2x4_t { transmute(vld4q_dup_s64(transmute(a))) } #[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(ld4r))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4q_dup_p64(a: *const p64) -> poly64x2x4_t { - let mut ret_val: poly64x2x4_t = transmute(vld4q_dup_s64(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]); - ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [1, 0]); - ret_val -} -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ld4r))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vld4q_dup_u64(a: *const u64) -> uint64x2x4_t { transmute(vld4q_dup_s64(transmute(a))) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(ld4r))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4q_dup_u64(a: *const u64) -> uint64x2x4_t { - let mut ret_val: uint64x2x4_t = transmute(vld4q_dup_s64(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]); - ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [1, 0]); - ret_val -} #[doc = "Load multiple 4-element structures to four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f64)"] #[doc = "## Safety"] @@ -12672,7 +11285,6 @@ pub unsafe fn vld4q_lane_p8(a: *const p8, b: poly8x16x4_t) -> p #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[target_feature(enable = "neon,aes")] #[cfg_attr(test, assert_instr(ld4))] @@ -12680,23 +11292,6 @@ pub unsafe fn vld4q_p64(a: *const p64) -> poly64x2x4_t { transmute(vld4q_s64(transmute(a))) } #[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(ld4))] -pub unsafe fn vld4q_p64(a: *const p64) -> poly64x2x4_t { - let mut ret_val: poly64x2x4_t = transmute(vld4q_s64(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]); - ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [1, 0]); - ret_val -} -#[doc = "Load multiple 4-element structures to four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] @@ -14210,18 +12805,18 @@ pub fn vmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(smlal2, LANE = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2, LANE = 1))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlal_high_lane_s16(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmlal_high_s16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) } + vmlal_high_s16(a, b, vdupq_lane_s16::(c)) } #[doc = "Multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(smlal2, LANE = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2, LANE = 1))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlal_high_laneq_s16( @@ -14230,24 +12825,24 @@ pub fn vmlal_high_laneq_s16( c: int16x8_t, ) -> int32x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { vmlal_high_s16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) } + vmlal_high_s16(a, b, vdupq_laneq_s16::(c)) } #[doc = "Multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(smlal2, LANE = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2, LANE = 1))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlal_high_lane_s32(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { vmlal_high_s32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) } + vmlal_high_s32(a, b, vdupq_lane_s32::(c)) } #[doc = "Multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(smlal2, LANE = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2, LANE = 1))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlal_high_laneq_s32( @@ -14256,13 +12851,13 @@ pub fn vmlal_high_laneq_s32( c: int32x4_t, ) -> int64x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmlal_high_s32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) } + vmlal_high_s32(a, b, vdupq_laneq_s32::(c)) } #[doc = "Multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_u16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(umlal2, LANE = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2, LANE = 1))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlal_high_lane_u16( @@ -14271,13 +12866,13 @@ pub fn vmlal_high_lane_u16( c: uint16x4_t, ) -> uint32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmlal_high_u16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) } + vmlal_high_u16(a, b, vdupq_lane_u16::(c)) } #[doc = "Multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_u16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(umlal2, LANE = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2, LANE = 1))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlal_high_laneq_u16( @@ -14286,13 +12881,13 @@ pub fn vmlal_high_laneq_u16( c: uint16x8_t, ) -> uint32x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { vmlal_high_u16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) } + vmlal_high_u16(a, b, vdupq_laneq_u16::(c)) } #[doc = "Multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(umlal2, LANE = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2, LANE = 1))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlal_high_lane_u32( @@ -14301,13 +12896,13 @@ pub fn vmlal_high_lane_u32( c: uint32x2_t, ) -> uint64x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { vmlal_high_u32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) } + vmlal_high_u32(a, b, vdupq_lane_u32::(c)) } #[doc = "Multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(umlal2, LANE = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2, LANE = 1))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlal_high_laneq_u32( @@ -14316,13 +12911,13 @@ pub fn vmlal_high_laneq_u32( c: uint32x4_t, ) -> uint64x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmlal_high_u32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) } + vmlal_high_u32(a, b, vdupq_laneq_u32::(c)) } #[doc = "Multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(smlal2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlal_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t { vmlal_high_s16(a, b, vdupq_n_s16(c)) @@ -14331,7 +12926,7 @@ pub fn vmlal_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(smlal2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlal_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t { vmlal_high_s32(a, b, vdupq_n_s32(c)) @@ -14340,7 +12935,7 @@ pub fn vmlal_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_u16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(umlal2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlal_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t { vmlal_high_u16(a, b, vdupq_n_u16(c)) @@ -14349,7 +12944,7 @@ pub fn vmlal_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(umlal2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlal_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t { vmlal_high_u32(a, b, vdupq_n_u32(c)) @@ -14358,79 +12953,67 @@ pub fn vmlal_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(smlal2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t { - unsafe { - let b: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); - let c: int8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]); - vmlal_s8(a, b, c) - } + let b = vget_high_s8(b); + let c = vget_high_s8(c); + vmlal_s8(a, b, c) } #[doc = "Signed multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(smlal2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t { - unsafe { - let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]); - let c: int16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]); - vmlal_s16(a, b, c) - } + let b = vget_high_s16(b); + let c = vget_high_s16(c); + vmlal_s16(a, b, c) } #[doc = "Signed multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(smlal2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t { - unsafe { - let b: int32x2_t = simd_shuffle!(b, b, [2, 3]); - let c: int32x2_t = simd_shuffle!(c, c, [2, 3]); - vmlal_s32(a, b, c) - } + let b = vget_high_s32(b); + let c = vget_high_s32(c); + vmlal_s32(a, b, c) } #[doc = "Unsigned multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_u8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(umlal2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t { - unsafe { - let b: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); - let c: uint8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]); - vmlal_u8(a, b, c) - } + let b = vget_high_u8(b); + let c = vget_high_u8(c); + vmlal_u8(a, b, c) } #[doc = "Unsigned multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_u16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(umlal2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t { - unsafe { - let b: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]); - let c: uint16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]); - vmlal_u16(a, b, c) - } + let b = vget_high_u16(b); + let c = vget_high_u16(c); + vmlal_u16(a, b, c) } #[doc = "Unsigned multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(umlal2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlal_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t { - unsafe { - let b: uint32x2_t = simd_shuffle!(b, b, [2, 3]); - let c: uint32x2_t = simd_shuffle!(c, c, [2, 3]); - vmlal_u32(a, b, c) - } + let b = vget_high_u32(b); + let c = vget_high_u32(c); + vmlal_u32(a, b, c) } #[doc = "Floating-point multiply-subtract from accumulator"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_f64)"] @@ -14454,18 +13037,18 @@ pub fn vmlsq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2, LANE = 1))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlsl_high_lane_s16(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmlsl_high_s16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) } + vmlsl_high_s16(a, b, vdupq_lane_s16::(c)) } #[doc = "Multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2, LANE = 1))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlsl_high_laneq_s16( @@ -14474,24 +13057,24 @@ pub fn vmlsl_high_laneq_s16( c: int16x8_t, ) -> int32x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { vmlsl_high_s16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) } + vmlsl_high_s16(a, b, vdupq_laneq_s16::(c)) } #[doc = "Multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2, LANE = 1))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlsl_high_lane_s32(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { vmlsl_high_s32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) } + vmlsl_high_s32(a, b, vdupq_lane_s32::(c)) } #[doc = "Multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2, LANE = 1))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlsl_high_laneq_s32( @@ -14500,13 +13083,13 @@ pub fn vmlsl_high_laneq_s32( c: int32x4_t, ) -> int64x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmlsl_high_s32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) } + vmlsl_high_s32(a, b, vdupq_laneq_s32::(c)) } #[doc = "Multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_u16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2, LANE = 1))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlsl_high_lane_u16( @@ -14515,13 +13098,13 @@ pub fn vmlsl_high_lane_u16( c: uint16x4_t, ) -> uint32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmlsl_high_u16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) } + vmlsl_high_u16(a, b, vdupq_lane_u16::(c)) } #[doc = "Multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_u16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2, LANE = 1))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlsl_high_laneq_u16( @@ -14530,13 +13113,13 @@ pub fn vmlsl_high_laneq_u16( c: uint16x8_t, ) -> uint32x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { vmlsl_high_u16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) } + vmlsl_high_u16(a, b, vdupq_laneq_u16::(c)) } #[doc = "Multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2, LANE = 1))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlsl_high_lane_u32( @@ -14545,13 +13128,13 @@ pub fn vmlsl_high_lane_u32( c: uint32x2_t, ) -> uint64x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { vmlsl_high_u32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) } + vmlsl_high_u32(a, b, vdupq_lane_u32::(c)) } #[doc = "Multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2, LANE = 1))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlsl_high_laneq_u32( @@ -14560,13 +13143,13 @@ pub fn vmlsl_high_laneq_u32( c: uint32x4_t, ) -> uint64x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmlsl_high_u32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) } + vmlsl_high_u32(a, b, vdupq_laneq_u32::(c)) } #[doc = "Multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(smlsl2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlsl_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t { vmlsl_high_s16(a, b, vdupq_n_s16(c)) @@ -14575,7 +13158,7 @@ pub fn vmlsl_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(smlsl2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlsl_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t { vmlsl_high_s32(a, b, vdupq_n_s32(c)) @@ -14584,7 +13167,7 @@ pub fn vmlsl_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_u16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(umlsl2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlsl_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t { vmlsl_high_u16(a, b, vdupq_n_u16(c)) @@ -14593,7 +13176,7 @@ pub fn vmlsl_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(umlsl2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlsl_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t { vmlsl_high_u32(a, b, vdupq_n_u32(c)) @@ -14602,223 +13185,181 @@ pub fn vmlsl_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(smlsl2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlsl_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t { - unsafe { - let b: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); - let c: int8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]); - vmlsl_s8(a, b, c) - } + let b = vget_high_s8(b); + let c = vget_high_s8(c); + vmlsl_s8(a, b, c) } #[doc = "Signed multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(smlsl2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlsl_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t { - unsafe { - let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]); - let c: int16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]); - vmlsl_s16(a, b, c) - } + let b = vget_high_s16(b); + let c = vget_high_s16(c); + vmlsl_s16(a, b, c) } #[doc = "Signed multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(smlsl2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlsl_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t { - unsafe { - let b: int32x2_t = simd_shuffle!(b, b, [2, 3]); - let c: int32x2_t = simd_shuffle!(c, c, [2, 3]); - vmlsl_s32(a, b, c) - } + let b = vget_high_s32(b); + let c = vget_high_s32(c); + vmlsl_s32(a, b, c) } #[doc = "Unsigned multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_u8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(umlsl2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlsl_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t { - unsafe { - let b: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); - let c: uint8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]); - vmlsl_u8(a, b, c) - } + let b = vget_high_u8(b); + let c = vget_high_u8(c); + vmlsl_u8(a, b, c) } #[doc = "Unsigned multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_u16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(umlsl2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlsl_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t { - unsafe { - let b: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]); - let c: uint16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]); - vmlsl_u16(a, b, c) - } + let b = vget_high_u16(b); + let c = vget_high_u16(c); + vmlsl_u16(a, b, c) } #[doc = "Unsigned multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(umlsl2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlsl_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t { - unsafe { - let b: uint32x2_t = simd_shuffle!(b, b, [2, 3]); - let c: uint32x2_t = simd_shuffle!(c, c, [2, 3]); - vmlsl_u32(a, b, c) - } + let b = vget_high_u32(b); + let c = vget_high_u32(c); + vmlsl_u32(a, b, c) } #[doc = "Vector move"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_s8)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(sxtl2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sxtl2))] pub fn vmovl_high_s8(a: int8x16_t) -> int16x8_t { - unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); - vmovl_s8(a) - } + let a = vget_high_s8(a); + vmovl_s8(a) } #[doc = "Vector move"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_s16)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(sxtl2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sxtl2))] pub fn vmovl_high_s16(a: int16x8_t) -> int32x4_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]); - vmovl_s16(a) - } + let a = vget_high_s16(a); + vmovl_s16(a) } #[doc = "Vector move"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_s32)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(sxtl2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sxtl2))] pub fn vmovl_high_s32(a: int32x4_t) -> int64x2_t { - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [2, 3]); - vmovl_s32(a) - } + let a = vget_high_s32(a); + vmovl_s32(a) } #[doc = "Vector move"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_u8)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(uxtl2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(uxtl2))] pub fn vmovl_high_u8(a: uint8x16_t) -> uint16x8_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); - vmovl_u8(a) - } + let a = vget_high_u8(a); + vmovl_u8(a) } #[doc = "Vector move"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_u16)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(uxtl2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(uxtl2))] pub fn vmovl_high_u16(a: uint16x8_t) -> uint32x4_t { - unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]); - vmovl_u16(a) - } + let a = vget_high_u16(a); + vmovl_u16(a) } #[doc = "Vector move"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_u32)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(uxtl2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(uxtl2))] pub fn vmovl_high_u32(a: uint32x4_t) -> uint64x2_t { - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [2, 3]); - vmovl_u32(a) - } + let a = vget_high_u32(a); + vmovl_u32(a) } #[doc = "Extract narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_s16)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(xtn2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(xtn2))] pub fn vmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { - unsafe { - let c: int8x8_t = simd_cast(b); - simd_shuffle!(a, c, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) - } + unsafe { vcombine_s8(a, simd_cast(b)) } } #[doc = "Extract narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_s32)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(xtn2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(xtn2))] pub fn vmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t { - unsafe { - let c: int16x4_t = simd_cast(b); - simd_shuffle!(a, c, [0, 1, 2, 3, 4, 5, 6, 7]) - } + unsafe { vcombine_s16(a, simd_cast(b)) } } #[doc = "Extract narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_s64)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(xtn2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(xtn2))] pub fn vmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t { - unsafe { - let c: int32x2_t = simd_cast(b); - simd_shuffle!(a, c, [0, 1, 2, 3]) - } + unsafe { vcombine_s32(a, simd_cast(b)) } } #[doc = "Extract narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_u16)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(xtn2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(xtn2))] pub fn vmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t { - unsafe { - let c: uint8x8_t = simd_cast(b); - simd_shuffle!(a, c, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) - } + unsafe { vcombine_u8(a, simd_cast(b)) } } #[doc = "Extract narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_u32)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(xtn2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(xtn2))] pub fn vmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t { - unsafe { - let c: uint16x4_t = simd_cast(b); - simd_shuffle!(a, c, [0, 1, 2, 3, 4, 5, 6, 7]) - } + unsafe { vcombine_u16(a, simd_cast(b)) } } #[doc = "Extract narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_u64)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(xtn2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(xtn2))] pub fn vmovn_high_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t { - unsafe { - let c: uint32x2_t = simd_cast(b); - simd_shuffle!(a, c, [0, 1, 2, 3]) - } + unsafe { vcombine_u32(a, simd_cast(b)) } } #[doc = "Multiply"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f64)"] @@ -14847,7 +13388,7 @@ pub fn vmulq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmul_lane_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t { static_assert!(LANE == 0); - unsafe { simd_mul(a, transmute::(simd_extract!(b, LANE as u32))) } + unsafe { simd_mul(a, transmute::(vget_lane_f64::(b))) } } #[doc = "Floating-point multiply"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f16)"] @@ -14859,7 +13400,7 @@ pub fn vmul_lane_f64(a: float64x1_t, b: float64x1_t) -> float64 #[cfg(not(target_arch = "arm64ec"))] pub fn vmul_laneq_f16(a: float16x4_t, b: float16x8_t) -> float16x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32; 4])) } + unsafe { simd_mul(a, vdup_laneq_f16::(b)) } } #[doc = "Floating-point multiply"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f16)"] @@ -14871,7 +13412,7 @@ pub fn vmul_laneq_f16(a: float16x4_t, b: float16x8_t) -> float1 #[cfg(not(target_arch = "arm64ec"))] pub fn vmulq_laneq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { static_assert_uimm_bits!(LANE, 3); - unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32; 8])) } + unsafe { simd_mul(a, vdupq_laneq_f16::(b)) } } #[doc = "Floating-point multiply"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f64)"] @@ -14882,7 +13423,7 @@ pub fn vmulq_laneq_f16(a: float16x8_t, b: float16x8_t) -> float #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmul_laneq_f64(a: float64x1_t, b: float64x2_t) -> float64x1_t { static_assert_uimm_bits!(LANE, 1); - unsafe { simd_mul(a, transmute::(simd_extract!(b, LANE as u32))) } + unsafe { simd_mul(a, transmute::(vgetq_lane_f64::(b))) } } #[doc = "Vector multiply by scalar"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f64)"] @@ -14911,10 +13452,8 @@ pub fn vmulq_n_f64(a: float64x2_t, b: f64) -> float64x2_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmuld_lane_f64(a: f64, b: float64x1_t) -> f64 { static_assert!(LANE == 0); - unsafe { - let b: f64 = simd_extract!(b, LANE as u32); - a * b - } + let b: f64 = vget_lane_f64::(b); + a * b } #[doc = "Add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulh_f16)"] @@ -14936,10 +13475,8 @@ pub fn vmulh_f16(a: f16, b: f16) -> f16 { #[cfg(not(target_arch = "arm64ec"))] pub fn vmulh_lane_f16(a: f16, b: float16x4_t) -> f16 { static_assert_uimm_bits!(LANE, 2); - unsafe { - let b: f16 = simd_extract!(b, LANE as u32); - a * b - } + let b: f16 = vget_lane_f16::(b); + a * b } #[doc = "Floating-point multiply"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulh_laneq_f16)"] @@ -14951,104 +13488,102 @@ pub fn vmulh_lane_f16(a: f16, b: float16x4_t) -> f16 { #[cfg(not(target_arch = "arm64ec"))] pub fn vmulh_laneq_f16(a: f16, b: float16x8_t) -> f16 { static_assert_uimm_bits!(LANE, 3); - unsafe { - let b: f16 = simd_extract!(b, LANE as u32); - a * b - } + let b: f16 = vgetq_lane_f16::(b); + a * b } #[doc = "Multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(smull2, LANE = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2, LANE = 1))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmull_high_lane_s16(a: int16x8_t, b: int16x4_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmull_high_s16(a, simd_shuffle!(b, b, [LANE as u32; 8])) } + vmull_high_s16(a, vdupq_lane_s16::(b)) } #[doc = "Multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(smull2, LANE = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2, LANE = 1))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmull_high_laneq_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { vmull_high_s16(a, simd_shuffle!(b, b, [LANE as u32; 8])) } + vmull_high_s16(a, vdupq_laneq_s16::(b)) } #[doc = "Multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(smull2, LANE = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2, LANE = 1))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmull_high_lane_s32(a: int32x4_t, b: int32x2_t) -> int64x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { vmull_high_s32(a, simd_shuffle!(b, b, [LANE as u32; 4])) } + vmull_high_s32(a, vdupq_lane_s32::(b)) } #[doc = "Multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(smull2, LANE = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2, LANE = 1))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmull_high_laneq_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmull_high_s32(a, simd_shuffle!(b, b, [LANE as u32; 4])) } + vmull_high_s32(a, vdupq_laneq_s32::(b)) } #[doc = "Multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_u16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(umull2, LANE = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2, LANE = 1))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmull_high_lane_u16(a: uint16x8_t, b: uint16x4_t) -> uint32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmull_high_u16(a, simd_shuffle!(b, b, [LANE as u32; 8])) } + vmull_high_u16(a, vdupq_lane_u16::(b)) } #[doc = "Multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_u16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(umull2, LANE = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2, LANE = 1))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmull_high_laneq_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { vmull_high_u16(a, simd_shuffle!(b, b, [LANE as u32; 8])) } + vmull_high_u16(a, vdupq_laneq_u16::(b)) } #[doc = "Multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(umull2, LANE = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2, LANE = 1))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmull_high_lane_u32(a: uint32x4_t, b: uint32x2_t) -> uint64x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { vmull_high_u32(a, simd_shuffle!(b, b, [LANE as u32; 4])) } + vmull_high_u32(a, vdupq_lane_u32::(b)) } #[doc = "Multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(umull2, LANE = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2, LANE = 1))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmull_high_laneq_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmull_high_u32(a, simd_shuffle!(b, b, [LANE as u32; 4])) } + vmull_high_u32(a, vdupq_laneq_u32::(b)) } #[doc = "Multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(smull2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t { vmull_high_s16(a, vdupq_n_s16(b)) @@ -15057,7 +13592,7 @@ pub fn vmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(smull2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t { vmull_high_s32(a, vdupq_n_s32(b)) @@ -15066,7 +13601,7 @@ pub fn vmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_u16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(umull2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmull_high_n_u16(a: uint16x8_t, b: u16) -> uint32x4_t { vmull_high_u16(a, vdupq_n_u16(b)) @@ -15075,7 +13610,7 @@ pub fn vmull_high_n_u16(a: uint16x8_t, b: u16) -> uint32x4_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(umull2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmull_high_n_u32(a: uint32x4_t, b: u32) -> uint64x2_t { vmull_high_u32(a, vdupq_n_u32(b)) @@ -15085,100 +13620,86 @@ pub fn vmull_high_n_u32(a: uint32x4_t, b: u32) -> uint64x2_t { #[inline] #[target_feature(enable = "neon,aes")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(pmull2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(pmull2))] pub fn vmull_high_p64(a: poly64x2_t, b: poly64x2_t) -> p128 { - unsafe { vmull_p64(simd_extract!(a, 1), simd_extract!(b, 1)) } + vmull_p64(vgetq_lane_p64::<1>(a), vgetq_lane_p64::<1>(b)) } #[doc = "Polynomial multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_p8)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(pmull2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(pmull2))] pub fn vmull_high_p8(a: poly8x16_t, b: poly8x16_t) -> poly16x8_t { - unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); - let b: poly8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); - vmull_p8(a, b) - } + let a = vget_high_p8(a); + let b = vget_high_p8(b); + vmull_p8(a, b) } #[doc = "Signed multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_s8)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(smull2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2))] pub fn vmull_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t { - unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); - let b: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); - vmull_s8(a, b) - } + let a = vget_high_s8(a); + let b = vget_high_s8(b); + vmull_s8(a, b) } #[doc = "Signed multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_s16)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(smull2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2))] pub fn vmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]); - let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]); - vmull_s16(a, b) - } + let a = vget_high_s16(a); + let b = vget_high_s16(b); + vmull_s16(a, b) } #[doc = "Signed multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_s32)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(smull2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2))] pub fn vmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [2, 3]); - let b: int32x2_t = simd_shuffle!(b, b, [2, 3]); - vmull_s32(a, b) - } + let a = vget_high_s32(a); + let b = vget_high_s32(b); + vmull_s32(a, b) } #[doc = "Unsigned multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_u8)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(umull2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2))] pub fn vmull_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); - let b: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); - vmull_u8(a, b) - } + let a = vget_high_u8(a); + let b = vget_high_u8(b); + vmull_u8(a, b) } #[doc = "Unsigned multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_u16)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(umull2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2))] pub fn vmull_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t { - unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]); - let b: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]); - vmull_u16(a, b) - } + let a = vget_high_u16(a); + let b = vget_high_u16(b); + vmull_u16(a, b) } #[doc = "Unsigned multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_u32)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(umull2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2))] pub fn vmull_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t { - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [2, 3]); - let b: uint32x2_t = simd_shuffle!(b, b, [2, 3]); - vmull_u32(a, b) - } + let a = vget_high_u32(a); + let b = vget_high_u32(b); + vmull_u32(a, b) } #[doc = "Polynomial multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_p64)"] @@ -15205,7 +13726,7 @@ pub fn vmull_p64(a: p64, b: p64) -> p128 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmulq_lane_f64(a: float64x2_t, b: float64x1_t) -> float64x2_t { static_assert!(LANE == 0); - unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32; 2])) } + unsafe { simd_mul(a, vdupq_lane_f64::(b)) } } #[doc = "Floating-point multiply"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f64)"] @@ -15216,7 +13737,7 @@ pub fn vmulq_lane_f64(a: float64x2_t, b: float64x1_t) -> float6 #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmulq_laneq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32; 2])) } + unsafe { simd_mul(a, vdupq_laneq_f64::(b)) } } #[doc = "Floating-point multiply"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmuls_lane_f32)"] @@ -15227,10 +13748,8 @@ pub fn vmulq_laneq_f64(a: float64x2_t, b: float64x2_t) -> float #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmuls_lane_f32(a: f32, b: float32x2_t) -> f32 { static_assert_uimm_bits!(LANE, 1); - unsafe { - let b: f32 = simd_extract!(b, LANE as u32); - a * b - } + let b: f32 = vget_lane_f32::(b); + a * b } #[doc = "Floating-point multiply"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmuls_laneq_f32)"] @@ -15241,10 +13760,8 @@ pub fn vmuls_lane_f32(a: f32, b: float32x2_t) -> f32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmuls_laneq_f32(a: f32, b: float32x4_t) -> f32 { static_assert_uimm_bits!(LANE, 2); - unsafe { - let b: f32 = simd_extract!(b, LANE as u32); - a * b - } + let b: f32 = vgetq_lane_f32::(b); + a * b } #[doc = "Floating-point multiply"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmuld_laneq_f64)"] @@ -15255,10 +13772,8 @@ pub fn vmuls_laneq_f32(a: f32, b: float32x4_t) -> f32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmuld_laneq_f64(a: f64, b: float64x2_t) -> f64 { static_assert_uimm_bits!(LANE, 1); - unsafe { - let b: f64 = simd_extract!(b, LANE as u32); - a * b - } + let b: f64 = vgetq_lane_f64::(b); + a * b } #[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_f16)"] @@ -15368,7 +13883,7 @@ pub fn vmulxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { #[cfg(not(target_arch = "arm64ec"))] pub fn vmulx_lane_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmulx_f16(a, simd_shuffle!(b, b, [LANE as u32; 4])) } + vmulx_f16(a, vdup_lane_f16::(b)) } #[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_laneq_f16)"] @@ -15380,7 +13895,7 @@ pub fn vmulx_lane_f16(a: float16x4_t, b: float16x4_t) -> float1 #[cfg(not(target_arch = "arm64ec"))] pub fn vmulx_laneq_f16(a: float16x4_t, b: float16x8_t) -> float16x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { vmulx_f16(a, simd_shuffle!(b, b, [LANE as u32; 4])) } + vmulx_f16(a, vdup_laneq_f16::(b)) } #[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_lane_f16)"] @@ -15392,7 +13907,7 @@ pub fn vmulx_laneq_f16(a: float16x4_t, b: float16x8_t) -> float #[cfg(not(target_arch = "arm64ec"))] pub fn vmulxq_lane_f16(a: float16x8_t, b: float16x4_t) -> float16x8_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmulxq_f16(a, simd_shuffle!(b, b, [LANE as u32; 8])) } + vmulxq_f16(a, vdupq_lane_f16::(b)) } #[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_laneq_f16)"] @@ -15404,7 +13919,7 @@ pub fn vmulxq_lane_f16(a: float16x8_t, b: float16x4_t) -> float #[cfg(not(target_arch = "arm64ec"))] pub fn vmulxq_laneq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { static_assert_uimm_bits!(LANE, 3); - unsafe { vmulxq_f16(a, simd_shuffle!(b, b, [LANE as u32; 8])) } + vmulxq_f16(a, vdupq_laneq_f16::(b)) } #[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_lane_f32)"] @@ -15415,7 +13930,7 @@ pub fn vmulxq_laneq_f16(a: float16x8_t, b: float16x8_t) -> floa #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmulx_lane_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { vmulx_f32(a, simd_shuffle!(b, b, [LANE as u32; 2])) } + vmulx_f32(a, vdup_lane_f32::(b)) } #[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_laneq_f32)"] @@ -15426,7 +13941,7 @@ pub fn vmulx_lane_f32(a: float32x2_t, b: float32x2_t) -> float3 #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmulx_laneq_f32(a: float32x2_t, b: float32x4_t) -> float32x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmulx_f32(a, simd_shuffle!(b, b, [LANE as u32; 2])) } + vmulx_f32(a, vdup_laneq_f32::(b)) } #[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_lane_f32)"] @@ -15437,7 +13952,7 @@ pub fn vmulx_laneq_f32(a: float32x2_t, b: float32x4_t) -> float #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmulxq_lane_f32(a: float32x4_t, b: float32x2_t) -> float32x4_t { static_assert_uimm_bits!(LANE, 1); - unsafe { vmulxq_f32(a, simd_shuffle!(b, b, [LANE as u32; 4])) } + vmulxq_f32(a, vdupq_lane_f32::(b)) } #[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_laneq_f32)"] @@ -15448,7 +13963,7 @@ pub fn vmulxq_lane_f32(a: float32x4_t, b: float32x2_t) -> float #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmulxq_laneq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmulxq_f32(a, simd_shuffle!(b, b, [LANE as u32; 4])) } + vmulxq_f32(a, vdupq_laneq_f32::(b)) } #[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_laneq_f64)"] @@ -15459,7 +13974,7 @@ pub fn vmulxq_laneq_f32(a: float32x4_t, b: float32x4_t) -> floa #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmulxq_laneq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { vmulxq_f64(a, simd_shuffle!(b, b, [LANE as u32; 2])) } + vmulxq_f64(a, vdupq_laneq_f64::(b)) } #[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_lane_f64)"] @@ -15470,7 +13985,7 @@ pub fn vmulxq_laneq_f64(a: float64x2_t, b: float64x2_t) -> floa #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmulx_lane_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t { static_assert!(LANE == 0); - unsafe { vmulx_f64(a, transmute::(simd_extract!(b, LANE as u32))) } + unsafe { vmulx_f64(a, transmute(vget_lane_f64::(b))) } } #[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_laneq_f64)"] @@ -15481,7 +13996,7 @@ pub fn vmulx_lane_f64(a: float64x1_t, b: float64x1_t) -> float6 #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmulx_laneq_f64(a: float64x1_t, b: float64x2_t) -> float64x1_t { static_assert_uimm_bits!(LANE, 1); - unsafe { vmulx_f64(a, transmute::(simd_extract!(b, LANE as u32))) } + unsafe { vmulx_f64(a, transmute(vgetq_lane_f64::(b))) } } #[doc = "Vector multiply by scalar"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_n_f16)"] @@ -15544,7 +14059,7 @@ pub fn vmulxs_f32(a: f32, b: f32) -> f32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmulxd_lane_f64(a: f64, b: float64x1_t) -> f64 { static_assert!(LANE == 0); - unsafe { vmulxd_f64(a, simd_extract!(b, LANE as u32)) } + vmulxd_f64(a, vget_lane_f64::(b)) } #[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxd_laneq_f64)"] @@ -15555,7 +14070,7 @@ pub fn vmulxd_lane_f64(a: f64, b: float64x1_t) -> f64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmulxd_laneq_f64(a: f64, b: float64x2_t) -> f64 { static_assert_uimm_bits!(LANE, 1); - unsafe { vmulxd_f64(a, simd_extract!(b, LANE as u32)) } + vmulxd_f64(a, vgetq_lane_f64::(b)) } #[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxs_lane_f32)"] @@ -15566,7 +14081,7 @@ pub fn vmulxd_laneq_f64(a: f64, b: float64x2_t) -> f64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmulxs_lane_f32(a: f32, b: float32x2_t) -> f32 { static_assert_uimm_bits!(LANE, 1); - unsafe { vmulxs_f32(a, simd_extract!(b, LANE as u32)) } + vmulxs_f32(a, vget_lane_f32::(b)) } #[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxs_laneq_f32)"] @@ -15577,7 +14092,7 @@ pub fn vmulxs_lane_f32(a: f32, b: float32x2_t) -> f32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmulxs_laneq_f32(a: f32, b: float32x4_t) -> f32 { static_assert_uimm_bits!(LANE, 2); - unsafe { vmulxs_f32(a, simd_extract!(b, LANE as u32)) } + vmulxs_f32(a, vgetq_lane_f32::(b)) } #[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxh_f16)"] @@ -15606,7 +14121,7 @@ pub fn vmulxh_f16(a: f16, b: f16) -> f16 { #[cfg(not(target_arch = "arm64ec"))] pub fn vmulxh_lane_f16(a: f16, b: float16x4_t) -> f16 { static_assert_uimm_bits!(LANE, 2); - unsafe { vmulxh_f16(a, simd_extract!(b, LANE as u32)) } + vmulxh_f16(a, vget_lane_f16::(b)) } #[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxh_laneq_f16)"] @@ -15618,7 +14133,7 @@ pub fn vmulxh_lane_f16(a: f16, b: float16x4_t) -> f16 { #[cfg(not(target_arch = "arm64ec"))] pub fn vmulxh_laneq_f16(a: f16, b: float16x8_t) -> f16 { static_assert_uimm_bits!(LANE, 3); - unsafe { vmulxh_f16(a, simd_extract!(b, LANE as u32)) } + vmulxh_f16(a, vgetq_lane_f16::(b)) } #[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_lane_f64)"] @@ -15629,7 +14144,7 @@ pub fn vmulxh_laneq_f16(a: f16, b: float16x8_t) -> f16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmulxq_lane_f64(a: float64x2_t, b: float64x1_t) -> float64x2_t { static_assert!(LANE == 0); - unsafe { vmulxq_f64(a, simd_shuffle!(b, b, [LANE as u32; 2])) } + vmulxq_f64(a, vdupq_lane_f64::(b)) } #[doc = "Negate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f64)"] @@ -15693,11 +14208,9 @@ pub fn vnegh_f16(a: f16) -> f16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] pub fn vpaddd_f64(a: float64x2_t) -> f64 { - unsafe { - let a1: f64 = simd_extract!(a, 0); - let a2: f64 = simd_extract!(a, 1); - a1 + a2 - } + let a1: f64 = vgetq_lane_f64::<0>(a); + let a2: f64 = vgetq_lane_f64::<1>(a); + a1 + a2 } #[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadds_f32)"] @@ -15706,11 +14219,9 @@ pub fn vpaddd_f64(a: float64x2_t) -> f64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] pub fn vpadds_f32(a: float32x2_t) -> f32 { - unsafe { - let a1: f32 = simd_extract!(a, 0); - let a2: f32 = simd_extract!(a, 1); - a1 + a2 - } + let a1: f32 = vget_lane_f32::<0>(a); + let a2: f32 = vget_lane_f32::<1>(a); + a1 + a2 } #[doc = "Add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddd_s64)"] @@ -15733,6 +14244,7 @@ pub fn vpaddd_u64(a: uint64x2_t) -> u64 { #[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] @@ -15745,8 +14257,27 @@ pub fn vpaddq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { } } #[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(faddp))] +pub fn vpaddq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<8>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<8>()); + let ret_val: float16x8_t = simd_add(even, odd); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(faddp))] @@ -15758,8 +14289,26 @@ pub fn vpaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { } } #[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(faddp))] +pub fn vpaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<4>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<4>()); + let ret_val: float32x4_t = simd_add(even, odd); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(faddp))] @@ -15770,9 +14319,27 @@ pub fn vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { simd_add(even, odd) } } +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(faddp))] +pub fn vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<2>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<2>()); + let ret_val: float64x2_t = simd_add(even, odd); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} #[doc = "Add Pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addp))] @@ -15784,8 +14351,32 @@ pub fn vpaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { } } #[doc = "Add Pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(addp))] +pub fn vpaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<16>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<16>()); + let ret_val: int8x16_t = simd_add(even, odd); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Add Pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addp))] @@ -15797,8 +14388,26 @@ pub fn vpaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { } } #[doc = "Add Pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(addp))] +pub fn vpaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<8>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<8>()); + let ret_val: int16x8_t = simd_add(even, odd); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Add Pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addp))] @@ -15810,8 +14419,26 @@ pub fn vpaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { } } #[doc = "Add Pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(addp))] +pub fn vpaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<4>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<4>()); + let ret_val: int32x4_t = simd_add(even, odd); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Add Pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addp))] @@ -15823,8 +14450,26 @@ pub fn vpaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { } } #[doc = "Add Pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(addp))] +pub fn vpaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { + let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int64x2_t = simd_shuffle!(b, b, [1, 0]); + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<2>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<2>()); + let ret_val: int64x2_t = simd_add(even, odd); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Add Pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addp))] @@ -15836,8 +14481,32 @@ pub fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { } } #[doc = "Add Pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(addp))] +pub fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<16>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<16>()); + let ret_val: uint8x16_t = simd_add(even, odd); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Add Pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addp))] @@ -15849,8 +14518,26 @@ pub fn vpaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { } } #[doc = "Add Pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(addp))] +pub fn vpaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<8>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<8>()); + let ret_val: uint16x8_t = simd_add(even, odd); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Add Pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addp))] @@ -15862,21 +14549,57 @@ pub fn vpaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { } } #[doc = "Add Pairwise"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addp))] -pub fn vpaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { +pub fn vpaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { unsafe { - let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<2>()); - let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<2>()); - simd_add(even, odd) + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<4>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<4>()); + let ret_val: uint32x4_t = simd_add(even, odd); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Floating-point add pairwise"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_f16)"] -#[inline] +#[doc = "Add Pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(addp))] +pub fn vpaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<2>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<2>()); + simd_add(even, odd) + } +} +#[doc = "Add Pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(addp))] +pub fn vpaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<2>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<2>()); + let ret_val: uint64x2_t = simd_add(even, odd); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_f16)"] +#[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] @@ -15892,8 +14615,32 @@ pub fn vpmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { unsafe { _vpmax_f16(a, b) } } #[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fmaxp))] +pub fn vpmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxp.v4f16" + )] + fn _vpmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = _vpmax_f16(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] @@ -15909,8 +14656,32 @@ pub fn vpmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { unsafe { _vpmaxq_f16(a, b) } } #[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fmaxp))] +pub fn vpmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxp.v8f16" + )] + fn _vpmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = _vpmaxq_f16(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnm_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] @@ -15926,8 +14697,32 @@ pub fn vpmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { unsafe { _vpmaxnm_f16(a, b) } } #[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnm_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fmaxnmp))] +pub fn vpmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnmp.v4f16" + )] + fn _vpmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = _vpmaxnm_f16(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] @@ -15942,9 +14737,33 @@ pub fn vpmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { } unsafe { _vpmaxnmq_f16(a, b) } } +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fmaxnmp))] +pub fn vpmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnmp.v8f16" + )] + fn _vpmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = _vpmaxnmq_f16(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} #[doc = "Floating-point Maximum Number Pairwise (vector)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnm_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fmaxnmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -15959,8 +14778,31 @@ pub fn vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { unsafe { _vpmaxnm_f32(a, b) } } #[doc = "Floating-point Maximum Number Pairwise (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnm_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fmaxnmp))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnmp.v2f32" + )] + fn _vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = _vpmaxnm_f32(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point Maximum Number Pairwise (vector)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fmaxnmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -15975,8 +14817,31 @@ pub fn vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { unsafe { _vpmaxnmq_f32(a, b) } } #[doc = "Floating-point Maximum Number Pairwise (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fmaxnmp))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnmp.v4f32" + )] + fn _vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x4_t = _vpmaxnmq_f32(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point Maximum Number Pairwise (vector)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fmaxnmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -15990,9 +14855,32 @@ pub fn vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { } unsafe { _vpmaxnmq_f64(a, b) } } +#[doc = "Floating-point Maximum Number Pairwise (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fmaxnmp))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnmp.v2f64" + )] + fn _vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; + } + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float64x2_t = _vpmaxnmq_f64(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} #[doc = "Floating-point maximum number pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmqd_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fmaxnmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -16007,8 +14895,29 @@ pub fn vpmaxnmqd_f64(a: float64x2_t) -> f64 { unsafe { _vpmaxnmqd_f64(a) } } #[doc = "Floating-point maximum number pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmqd_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fmaxnmp))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vpmaxnmqd_f64(a: float64x2_t) -> f64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnmv.f64.v2f64" + )] + fn _vpmaxnmqd_f64(a: float64x2_t) -> f64; + } + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + _vpmaxnmqd_f64(a) + } +} +#[doc = "Floating-point maximum number pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnms_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fmaxnmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -16022,9 +14931,30 @@ pub fn vpmaxnms_f32(a: float32x2_t) -> f32 { } unsafe { _vpmaxnms_f32(a) } } +#[doc = "Floating-point maximum number pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnms_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fmaxnmp))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vpmaxnms_f32(a: float32x2_t) -> f32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnmv.f32.v2f32" + )] + fn _vpmaxnms_f32(a: float32x2_t) -> f32; + } + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + _vpmaxnms_f32(a) + } +} #[doc = "Folding maximum of adjacent pairs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(fmaxp))] @@ -16039,44 +14969,119 @@ pub fn vpmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { unsafe { _vpmaxq_f32(a, b) } } #[doc = "Folding maximum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(fmaxp))] -pub fn vpmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { +pub fn vpmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmaxp.v2f64" + link_name = "llvm.aarch64.neon.fmaxp.v4f32" )] - fn _vpmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; + fn _vpmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x4_t = _vpmaxq_f32(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vpmaxq_f64(a, b) } } #[doc = "Folding maximum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(smaxp))] -pub fn vpmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { +#[cfg_attr(test, assert_instr(fmaxp))] +pub fn vpmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.smaxp.v16i8" + link_name = "llvm.aarch64.neon.fmaxp.v2f64" )] - fn _vpmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + fn _vpmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; } - unsafe { _vpmaxq_s8(a, b) } + unsafe { _vpmaxq_f64(a, b) } } #[doc = "Folding maximum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_f64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(smaxp))] -pub fn vpmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { +#[cfg_attr(test, assert_instr(fmaxp))] +pub fn vpmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxp.v2f64" + )] + fn _vpmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; + } + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float64x2_t = _vpmaxq_f64(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_s8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(smaxp))] +pub fn vpmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.smaxp.v16i8" + )] + fn _vpmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } + unsafe { _vpmaxq_s8(a, b) } +} +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(smaxp))] +pub fn vpmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.smaxp.v16i8" + )] + fn _vpmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = _vpmaxq_s8(a, b); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_s16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(smaxp))] +pub fn vpmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), @@ -16087,8 +15092,31 @@ pub fn vpmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { unsafe { _vpmaxq_s16(a, b) } } #[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_s16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(smaxp))] +pub fn vpmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.smaxp.v8i16" + )] + fn _vpmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x8_t = _vpmaxq_s16(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Folding maximum of adjacent pairs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(smaxp))] @@ -16103,8 +15131,31 @@ pub fn vpmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { unsafe { _vpmaxq_s32(a, b) } } #[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(smaxp))] +pub fn vpmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.smaxp.v4i32" + )] + fn _vpmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int32x4_t = _vpmaxq_s32(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Folding maximum of adjacent pairs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(umaxp))] @@ -16119,8 +15170,37 @@ pub fn vpmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { unsafe { _vpmaxq_u8(a, b) } } #[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(umaxp))] +pub fn vpmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.umaxp.v16i8" + )] + fn _vpmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = _vpmaxq_u8(a, b); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Folding maximum of adjacent pairs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(umaxp))] @@ -16135,8 +15215,31 @@ pub fn vpmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { unsafe { _vpmaxq_u16(a, b) } } #[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(umaxp))] +pub fn vpmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.umaxp.v8i16" + )] + fn _vpmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } + unsafe { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x8_t = _vpmaxq_u16(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Folding maximum of adjacent pairs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(umaxp))] @@ -16150,6 +15253,28 @@ pub fn vpmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { } unsafe { _vpmaxq_u32(a, b) } } +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(umaxp))] +pub fn vpmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.umaxp.v4i32" + )] + fn _vpmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vpmaxq_u32(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} #[doc = "Floating-point maximum pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxqd_f64)"] #[inline] @@ -16185,6 +15310,7 @@ pub fn vpmaxs_f32(a: float32x2_t) -> f32 { #[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] @@ -16200,8 +15326,32 @@ pub fn vpmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { unsafe { _vpmin_f16(a, b) } } #[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fminp))] +pub fn vpmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminp.v4f16" + )] + fn _vpmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = _vpmin_f16(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] @@ -16217,8 +15367,32 @@ pub fn vpminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { unsafe { _vpminq_f16(a, b) } } #[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fminp))] +pub fn vpminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminp.v8f16" + )] + fn _vpminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = _vpminq_f16(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnm_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] @@ -16234,8 +15408,32 @@ pub fn vpminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { unsafe { _vpminnm_f16(a, b) } } #[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnm_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fminnmp))] +pub fn vpminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnmp.v4f16" + )] + fn _vpminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = _vpminnm_f16(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmq_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,fp16")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] @@ -16250,25 +15448,72 @@ pub fn vpminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { } unsafe { _vpminnmq_f16(a, b) } } -#[doc = "Floating-point Minimum Number Pairwise (vector)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnm_f32)"] +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmq_f16)"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] #[cfg_attr(test, assert_instr(fminnmp))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { +pub fn vpminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fminnmp.v2f32" + link_name = "llvm.aarch64.neon.fminnmp.v8f16" + )] + fn _vpminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = _vpminnmq_f16(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Floating-point Minimum Number Pairwise (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnm_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fminnmp))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnmp.v2f32" )] fn _vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; } unsafe { _vpminnm_f32(a, b) } } #[doc = "Floating-point Minimum Number Pairwise (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnm_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fminnmp))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnmp.v2f32" + )] + fn _vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = _vpminnm_f32(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Floating-point Minimum Number Pairwise (vector)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmq_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fminnmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -16283,8 +15528,31 @@ pub fn vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { unsafe { _vpminnmq_f32(a, b) } } #[doc = "Floating-point Minimum Number Pairwise (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmq_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fminnmp))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnmp.v4f32" + )] + fn _vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x4_t = _vpminnmq_f32(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point Minimum Number Pairwise (vector)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmq_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fminnmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -16298,9 +15566,32 @@ pub fn vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { } unsafe { _vpminnmq_f64(a, b) } } +#[doc = "Floating-point Minimum Number Pairwise (vector)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmq_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fminnmp))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnmp.v2f64" + )] + fn _vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; + } + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float64x2_t = _vpminnmq_f64(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} #[doc = "Floating-point minimum number pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmqd_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fminnmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -16315,8 +15606,29 @@ pub fn vpminnmqd_f64(a: float64x2_t) -> f64 { unsafe { _vpminnmqd_f64(a) } } #[doc = "Floating-point minimum number pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmqd_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fminnmp))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vpminnmqd_f64(a: float64x2_t) -> f64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnmv.f64.v2f64" + )] + fn _vpminnmqd_f64(a: float64x2_t) -> f64; + } + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + _vpminnmqd_f64(a) + } +} +#[doc = "Floating-point minimum number pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnms_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fminnmp))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -16330,9 +15642,30 @@ pub fn vpminnms_f32(a: float32x2_t) -> f32 { } unsafe { _vpminnms_f32(a) } } +#[doc = "Floating-point minimum number pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnms_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fminnmp))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vpminnms_f32(a: float32x2_t) -> f32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnmv.f32.v2f32" + )] + fn _vpminnms_f32(a: float32x2_t) -> f32; + } + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + _vpminnms_f32(a) + } +} #[doc = "Folding minimum of adjacent pairs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(fminp))] @@ -16347,8 +15680,31 @@ pub fn vpminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { unsafe { _vpminq_f32(a, b) } } #[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(fminp))] +pub fn vpminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminp.v4f32" + )] + fn _vpminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x4_t = _vpminq_f32(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Folding minimum of adjacent pairs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(fminp))] @@ -16363,8 +15719,31 @@ pub fn vpminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { unsafe { _vpminq_f64(a, b) } } #[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(fminp))] +pub fn vpminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminp.v2f64" + )] + fn _vpminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; + } + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float64x2_t = _vpminq_f64(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Folding minimum of adjacent pairs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(sminp))] @@ -16379,8 +15758,37 @@ pub fn vpminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { unsafe { _vpminq_s8(a, b) } } #[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(sminp))] +pub fn vpminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sminp.v16i8" + )] + fn _vpminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = _vpminq_s8(a, b); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Folding minimum of adjacent pairs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(sminp))] @@ -16395,88 +15803,230 @@ pub fn vpminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { unsafe { _vpminq_s16(a, b) } } #[doc = "Folding minimum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(sminp))] -pub fn vpminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { +pub fn vpminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sminp.v4i32" + link_name = "llvm.aarch64.neon.sminp.v8i16" )] - fn _vpminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + fn _vpminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x8_t = _vpminq_s16(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } - unsafe { _vpminq_s32(a, b) } } #[doc = "Folding minimum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(uminp))] -pub fn vpminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { +#[cfg_attr(test, assert_instr(sminp))] +pub fn vpminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uminp.v16i8" + link_name = "llvm.aarch64.neon.sminp.v4i32" )] - fn _vpminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + fn _vpminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; } - unsafe { _vpminq_u8(a, b) } + unsafe { _vpminq_s32(a, b) } } #[doc = "Folding minimum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(uminp))] -pub fn vpminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { +#[cfg_attr(test, assert_instr(sminp))] +pub fn vpminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uminp.v8i16" + link_name = "llvm.aarch64.neon.sminp.v4i32" )] - fn _vpminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + fn _vpminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int32x4_t = _vpminq_s32(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vpminq_u16(a, b) } } #[doc = "Folding minimum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(uminp))] -pub fn vpminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { +pub fn vpminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uminp.v4i32" + link_name = "llvm.aarch64.neon.uminp.v16i8" )] - fn _vpminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + fn _vpminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; } - unsafe { _vpminq_u32(a, b) } + unsafe { _vpminq_u8(a, b) } } -#[doc = "Floating-point minimum pairwise"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminqd_f64)"] +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(fminp))] -pub fn vpminqd_f64(a: float64x2_t) -> f64 { +#[cfg_attr(test, assert_instr(uminp))] +pub fn vpminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fminv.f64.v2f64" + link_name = "llvm.aarch64.neon.uminp.v16i8" + )] + fn _vpminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = _vpminq_u8(a, b); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_u16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(uminp))] +pub fn vpminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uminp.v8i16" + )] + fn _vpminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } + unsafe { _vpminq_u16(a, b) } +} +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(uminp))] +pub fn vpminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uminp.v8i16" + )] + fn _vpminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } + unsafe { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x8_t = _vpminq_u16(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(uminp))] +pub fn vpminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uminp.v4i32" + )] + fn _vpminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } + unsafe { _vpminq_u32(a, b) } +} +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(uminp))] +pub fn vpminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uminp.v4i32" + )] + fn _vpminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vpminq_u32(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Floating-point minimum pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminqd_f64)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(fminp))] +pub fn vpminqd_f64(a: float64x2_t) -> f64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminv.f64.v2f64" )] fn _vpminqd_f64(a: float64x2_t) -> f64; } unsafe { _vpminqd_f64(a) } } #[doc = "Floating-point minimum pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminqd_f64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(fminp))] +pub fn vpminqd_f64(a: float64x2_t) -> f64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminv.f64.v2f64" + )] + fn _vpminqd_f64(a: float64x2_t) -> f64; + } + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + _vpminqd_f64(a) + } +} +#[doc = "Floating-point minimum pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmins_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(fminp))] @@ -16490,6 +16040,26 @@ pub fn vpmins_f32(a: float32x2_t) -> f32 { } unsafe { _vpmins_f32(a) } } +#[doc = "Floating-point minimum pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmins_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(fminp))] +pub fn vpmins_f32(a: float32x2_t) -> f32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminv.f32.v2f32" + )] + fn _vpmins_f32(a: float32x2_t) -> f32; + } + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + _vpmins_f32(a) + } +} #[doc = "Signed saturating Absolute value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s64)"] #[inline] @@ -16529,7 +16099,7 @@ pub fn vqabsq_s64(a: int64x2_t) -> int64x2_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))] pub fn vqabsb_s8(a: i8) -> i8 { - unsafe { simd_extract!(vqabs_s8(vdup_n_s8(a)), 0) } + vget_lane_s8::<0>(vqabs_s8(vdup_n_s8(a))) } #[doc = "Signed saturating absolute value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsh_s16)"] @@ -16538,7 +16108,7 @@ pub fn vqabsb_s8(a: i8) -> i8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))] pub fn vqabsh_s16(a: i16) -> i16 { - unsafe { simd_extract!(vqabs_s16(vdup_n_s16(a)), 0) } + vget_lane_s16::<0>(vqabs_s16(vdup_n_s16(a))) } #[doc = "Signed saturating absolute value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabss_s32)"] @@ -16581,7 +16151,7 @@ pub fn vqabsd_s64(a: i64) -> i64 { pub fn vqaddb_s8(a: i8, b: i8) -> i8 { let a: int8x8_t = vdup_n_s8(a); let b: int8x8_t = vdup_n_s8(b); - unsafe { simd_extract!(vqadd_s8(a, b), 0) } + vget_lane_s8::<0>(vqadd_s8(a, b)) } #[doc = "Saturating add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddh_s16)"] @@ -16592,7 +16162,7 @@ pub fn vqaddb_s8(a: i8, b: i8) -> i8 { pub fn vqaddh_s16(a: i16, b: i16) -> i16 { let a: int16x4_t = vdup_n_s16(a); let b: int16x4_t = vdup_n_s16(b); - unsafe { simd_extract!(vqadd_s16(a, b), 0) } + vget_lane_s16::<0>(vqadd_s16(a, b)) } #[doc = "Saturating add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddb_u8)"] @@ -16603,7 +16173,7 @@ pub fn vqaddh_s16(a: i16, b: i16) -> i16 { pub fn vqaddb_u8(a: u8, b: u8) -> u8 { let a: uint8x8_t = vdup_n_u8(a); let b: uint8x8_t = vdup_n_u8(b); - unsafe { simd_extract!(vqadd_u8(a, b), 0) } + vget_lane_u8::<0>(vqadd_u8(a, b)) } #[doc = "Saturating add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddh_u16)"] @@ -16614,7 +16184,7 @@ pub fn vqaddb_u8(a: u8, b: u8) -> u8 { pub fn vqaddh_u16(a: u16, b: u16) -> u16 { let a: uint16x4_t = vdup_n_u16(a); let b: uint16x4_t = vdup_n_u16(b); - unsafe { simd_extract!(vqadd_u16(a, b), 0) } + vget_lane_u16::<0>(vqadd_u16(a, b)) } #[doc = "Saturating add"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadds_s32)"] @@ -16684,7 +16254,7 @@ pub fn vqaddd_u64(a: u64, b: u64) -> u64 { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_lane_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal2, N = 1))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlal_high_lane_s16(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t { @@ -16695,7 +16265,7 @@ pub fn vqdmlal_high_lane_s16(a: int32x4_t, b: int16x8_t, c: int16x #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal2, N = 1))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlal_high_laneq_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t { @@ -16706,7 +16276,7 @@ pub fn vqdmlal_high_laneq_s16(a: int32x4_t, b: int16x8_t, c: int16 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_lane_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal2, N = 1))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlal_high_lane_s32(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t { @@ -16717,7 +16287,7 @@ pub fn vqdmlal_high_lane_s32(a: int64x2_t, b: int32x4_t, c: int32x #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal2, N = 1))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlal_high_laneq_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t { @@ -16728,7 +16298,7 @@ pub fn vqdmlal_high_laneq_s32(a: int64x2_t, b: int32x4_t, c: int32 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_n_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlal2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlal_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t { vqaddq_s32(a, vqdmull_high_n_s16(b, c)) @@ -16737,7 +16307,7 @@ pub fn vqdmlal_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlal2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t { vqaddq_s32(a, vqdmull_high_s16(b, c)) @@ -16746,7 +16316,7 @@ pub fn vqdmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_n_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlal2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlal_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t { vqaddq_s64(a, vqdmull_high_n_s32(b, c)) @@ -16755,7 +16325,7 @@ pub fn vqdmlal_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlal2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t { vqaddq_s64(a, vqdmull_high_s32(b, c)) @@ -16764,7 +16334,7 @@ pub fn vqdmlal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlal, N = 2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal, N = 2))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlal_laneq_s16(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t { @@ -16775,7 +16345,7 @@ pub fn vqdmlal_laneq_s16(a: int32x4_t, b: int16x4_t, c: int16x8_t) #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlal, N = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal, N = 1))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlal_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t { @@ -16786,55 +16356,55 @@ pub fn vqdmlal_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t) #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlalh_lane_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal, LANE = 0))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlalh_lane_s16(a: i32, b: i16, c: int16x4_t) -> i32 { static_assert_uimm_bits!(LANE, 2); - unsafe { vqdmlalh_s16(a, b, simd_extract!(c, LANE as u32)) } + vqdmlalh_s16(a, b, vget_lane_s16::(c)) } #[doc = "Signed saturating doubling multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlalh_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal, LANE = 0))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlalh_laneq_s16(a: i32, b: i16, c: int16x8_t) -> i32 { static_assert_uimm_bits!(LANE, 3); - unsafe { vqdmlalh_s16(a, b, simd_extract!(c, LANE as u32)) } + vqdmlalh_s16(a, b, vgetq_lane_s16::(c)) } #[doc = "Signed saturating doubling multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlals_lane_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal, LANE = 0))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlals_lane_s32(a: i64, b: i32, c: int32x2_t) -> i64 { static_assert_uimm_bits!(LANE, 1); - unsafe { vqdmlals_s32(a, b, simd_extract!(c, LANE as u32)) } + vqdmlals_s32(a, b, vget_lane_s32::(c)) } #[doc = "Signed saturating doubling multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlals_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal, LANE = 0))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlals_laneq_s32(a: i64, b: i32, c: int32x4_t) -> i64 { static_assert_uimm_bits!(LANE, 2); - unsafe { vqdmlals_s32(a, b, simd_extract!(c, LANE as u32)) } + vqdmlals_s32(a, b, vgetq_lane_s32::(c)) } #[doc = "Signed saturating doubling multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlalh_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlal))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlalh_s16(a: i32, b: i16, c: i16) -> i32 { let x: int32x4_t = vqdmull_s16(vdup_n_s16(b), vdup_n_s16(c)); - unsafe { vqadds_s32(a, simd_extract!(x, 0)) } + vqadds_s32(a, vgetq_lane_s32::<0>(x)) } #[doc = "Signed saturating doubling multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlals_s32)"] @@ -16850,7 +16420,7 @@ pub fn vqdmlals_s32(a: i64, b: i32, c: i32) -> i64 { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_lane_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl2, N = 1))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlsl_high_lane_s16(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t { @@ -16861,7 +16431,7 @@ pub fn vqdmlsl_high_lane_s16(a: int32x4_t, b: int16x8_t, c: int16x #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl2, N = 1))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlsl_high_laneq_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t { @@ -16872,7 +16442,7 @@ pub fn vqdmlsl_high_laneq_s16(a: int32x4_t, b: int16x8_t, c: int16 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_lane_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl2, N = 1))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlsl_high_lane_s32(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t { @@ -16883,7 +16453,7 @@ pub fn vqdmlsl_high_lane_s32(a: int64x2_t, b: int32x4_t, c: int32x #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl2, N = 1))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlsl_high_laneq_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t { @@ -16894,7 +16464,7 @@ pub fn vqdmlsl_high_laneq_s32(a: int64x2_t, b: int32x4_t, c: int32 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_n_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlsl2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlsl_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t { vqsubq_s32(a, vqdmull_high_n_s16(b, c)) @@ -16903,7 +16473,7 @@ pub fn vqdmlsl_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlsl2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlsl_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t { vqsubq_s32(a, vqdmull_high_s16(b, c)) @@ -16912,7 +16482,7 @@ pub fn vqdmlsl_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_n_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlsl2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlsl_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t { vqsubq_s64(a, vqdmull_high_n_s32(b, c)) @@ -16921,7 +16491,7 @@ pub fn vqdmlsl_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlsl2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlsl_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t { vqsubq_s64(a, vqdmull_high_s32(b, c)) @@ -16930,7 +16500,7 @@ pub fn vqdmlsl_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlsl, N = 2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl, N = 2))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlsl_laneq_s16(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t { @@ -16941,7 +16511,7 @@ pub fn vqdmlsl_laneq_s16(a: int32x4_t, b: int16x4_t, c: int16x8_t) #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlsl, N = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl, N = 1))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlsl_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t { @@ -16952,55 +16522,55 @@ pub fn vqdmlsl_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t) #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlslh_lane_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl, LANE = 0))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlslh_lane_s16(a: i32, b: i16, c: int16x4_t) -> i32 { static_assert_uimm_bits!(LANE, 2); - unsafe { vqdmlslh_s16(a, b, simd_extract!(c, LANE as u32)) } + vqdmlslh_s16(a, b, vget_lane_s16::(c)) } #[doc = "Signed saturating doubling multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlslh_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl, LANE = 0))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlslh_laneq_s16(a: i32, b: i16, c: int16x8_t) -> i32 { static_assert_uimm_bits!(LANE, 3); - unsafe { vqdmlslh_s16(a, b, simd_extract!(c, LANE as u32)) } + vqdmlslh_s16(a, b, vgetq_lane_s16::(c)) } #[doc = "Signed saturating doubling multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsls_lane_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl, LANE = 0))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlsls_lane_s32(a: i64, b: i32, c: int32x2_t) -> i64 { static_assert_uimm_bits!(LANE, 1); - unsafe { vqdmlsls_s32(a, b, simd_extract!(c, LANE as u32)) } + vqdmlsls_s32(a, b, vget_lane_s32::(c)) } #[doc = "Signed saturating doubling multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsls_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl, LANE = 0))] #[rustc_legacy_const_generics(3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlsls_laneq_s32(a: i64, b: i32, c: int32x4_t) -> i64 { static_assert_uimm_bits!(LANE, 2); - unsafe { vqdmlsls_s32(a, b, simd_extract!(c, LANE as u32)) } + vqdmlsls_s32(a, b, vgetq_lane_s32::(c)) } #[doc = "Signed saturating doubling multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlslh_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmlsl))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmlslh_s16(a: i32, b: i16, c: i16) -> i32 { let x: int32x4_t = vqdmull_s16(vdup_n_s16(b), vdup_n_s16(c)); - unsafe { vqsubs_s32(a, simd_extract!(x, 0)) } + vqsubs_s32(a, vgetq_lane_s32::<0>(x)) } #[doc = "Signed saturating doubling multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsls_s32)"] @@ -17021,7 +16591,7 @@ pub fn vqdmlsls_s32(a: i64, b: i32, c: i32) -> i64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmulh_lane_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vqdmulh_s16(a, vdup_n_s16(simd_extract!(b, LANE as u32))) } + vqdmulh_s16(a, vdup_n_s16(vget_lane_s16::(b))) } #[doc = "Vector saturating doubling multiply high by scalar"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_lane_s16)"] @@ -17032,7 +16602,7 @@ pub fn vqdmulh_lane_s16(a: int16x4_t, b: int16x4_t) -> int16x4_ #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmulhq_lane_s16(a: int16x8_t, b: int16x4_t) -> int16x8_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vqdmulhq_s16(a, vdupq_n_s16(simd_extract!(b, LANE as u32))) } + vqdmulhq_s16(a, vdupq_n_s16(vget_lane_s16::(b))) } #[doc = "Vector saturating doubling multiply high by scalar"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_lane_s32)"] @@ -17043,7 +16613,7 @@ pub fn vqdmulhq_lane_s16(a: int16x8_t, b: int16x4_t) -> int16x8 #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmulh_lane_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { vqdmulh_s32(a, vdup_n_s32(simd_extract!(b, LANE as u32))) } + vqdmulh_s32(a, vdup_n_s32(vget_lane_s32::(b))) } #[doc = "Vector saturating doubling multiply high by scalar"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_lane_s32)"] @@ -17054,7 +16624,7 @@ pub fn vqdmulh_lane_s32(a: int32x2_t, b: int32x2_t) -> int32x2_ #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmulhq_lane_s32(a: int32x4_t, b: int32x2_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 1); - unsafe { vqdmulhq_s32(a, vdupq_n_s32(simd_extract!(b, LANE as u32))) } + vqdmulhq_s32(a, vdupq_n_s32(vget_lane_s32::(b))) } #[doc = "Signed saturating doubling multiply returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhh_lane_s16)"] @@ -17065,10 +16635,8 @@ pub fn vqdmulhq_lane_s32(a: int32x4_t, b: int32x2_t) -> int32x4 #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmulhh_lane_s16(a: i16, b: int16x4_t) -> i16 { static_assert_uimm_bits!(N, 2); - unsafe { - let b: i16 = simd_extract!(b, N as u32); - vqdmulhh_s16(a, b) - } + let b: i16 = vget_lane_s16::(b); + vqdmulhh_s16(a, b) } #[doc = "Signed saturating doubling multiply returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhh_laneq_s16)"] @@ -17079,10 +16647,8 @@ pub fn vqdmulhh_lane_s16(a: i16, b: int16x4_t) -> i16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmulhh_laneq_s16(a: i16, b: int16x8_t) -> i16 { static_assert_uimm_bits!(N, 3); - unsafe { - let b: i16 = simd_extract!(b, N as u32); - vqdmulhh_s16(a, b) - } + let b: i16 = vgetq_lane_s16::(b); + vqdmulhh_s16(a, b) } #[doc = "Signed saturating doubling multiply returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhh_s16)"] @@ -17093,7 +16659,7 @@ pub fn vqdmulhh_laneq_s16(a: i16, b: int16x8_t) -> i16 { pub fn vqdmulhh_s16(a: i16, b: i16) -> i16 { let a: int16x4_t = vdup_n_s16(a); let b: int16x4_t = vdup_n_s16(b); - unsafe { simd_extract!(vqdmulh_s16(a, b), 0) } + vget_lane_s16::<0>(vqdmulh_s16(a, b)) } #[doc = "Signed saturating doubling multiply returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhs_s32)"] @@ -17104,7 +16670,7 @@ pub fn vqdmulhh_s16(a: i16, b: i16) -> i16 { pub fn vqdmulhs_s32(a: i32, b: i32) -> i32 { let a: int32x2_t = vdup_n_s32(a); let b: int32x2_t = vdup_n_s32(b); - unsafe { simd_extract!(vqdmulh_s32(a, b), 0) } + vget_lane_s32::<0>(vqdmulh_s32(a, b)) } #[doc = "Signed saturating doubling multiply returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhs_lane_s32)"] @@ -17115,10 +16681,8 @@ pub fn vqdmulhs_s32(a: i32, b: i32) -> i32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmulhs_lane_s32(a: i32, b: int32x2_t) -> i32 { static_assert_uimm_bits!(N, 1); - unsafe { - let b: i32 = simd_extract!(b, N as u32); - vqdmulhs_s32(a, b) - } + let b: i32 = vget_lane_s32::(b); + vqdmulhs_s32(a, b) } #[doc = "Signed saturating doubling multiply returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhs_laneq_s32)"] @@ -17129,122 +16693,104 @@ pub fn vqdmulhs_lane_s32(a: i32, b: int32x2_t) -> i32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmulhs_laneq_s32(a: i32, b: int32x4_t) -> i32 { static_assert_uimm_bits!(N, 2); - unsafe { - let b: i32 = simd_extract!(b, N as u32); - vqdmulhs_s32(a, b) - } + let b: i32 = vgetq_lane_s32::(b); + vqdmulhs_s32(a, b) } #[doc = "Signed saturating doubling multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_lane_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmull2, N = 2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2, N = 2))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmull_high_lane_s16(a: int16x8_t, b: int16x4_t) -> int32x4_t { static_assert_uimm_bits!(N, 2); - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]); - let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]); - vqdmull_s16(a, b) - } + let a = vget_high_s16(a); + let b = vdup_lane_s16::(b); + vqdmull_s16(a, b) } #[doc = "Signed saturating doubling multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmull2, N = 2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2, N = 2))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmull_high_laneq_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { static_assert_uimm_bits!(N, 2); - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [2, 3]); - let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]); - vqdmull_s32(a, b) - } + let a = vget_high_s32(a); + let b = vdup_laneq_s32::(b); + vqdmull_s32(a, b) } #[doc = "Signed saturating doubling multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_lane_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmull2, N = 1))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2, N = 1))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmull_high_lane_s32(a: int32x4_t, b: int32x2_t) -> int64x2_t { static_assert_uimm_bits!(N, 1); - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [2, 3]); - let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]); - vqdmull_s32(a, b) - } + let a = vget_high_s32(a); + let b = vdup_lane_s32::(b); + vqdmull_s32(a, b) } #[doc = "Signed saturating doubling multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmull2, N = 4))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2, N = 4))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmull_high_laneq_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { static_assert_uimm_bits!(N, 3); - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]); - let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]); - vqdmull_s16(a, b) - } + let a = vget_high_s16(a); + let b = vdup_laneq_s16::(b); + vqdmull_s16(a, b) } #[doc = "Signed saturating doubling multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_n_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmull2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]); - let b: int16x4_t = vdup_n_s16(b); - vqdmull_s16(a, b) - } + let a = vget_high_s16(a); + let b = vdup_n_s16(b); + vqdmull_s16(a, b) } #[doc = "Signed saturating doubling multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_n_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmull2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t { - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [2, 3]); - let b: int32x2_t = vdup_n_s32(b); - vqdmull_s32(a, b) - } + let a = vget_high_s32(a); + let b = vdup_n_s32(b); + vqdmull_s32(a, b) } #[doc = "Signed saturating doubling multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmull2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]); - let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]); - vqdmull_s16(a, b) - } + let a = vget_high_s16(a); + let b = vget_high_s16(b); + vqdmull_s16(a, b) } #[doc = "Signed saturating doubling multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqdmull2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [2, 3]); - let b: int32x2_t = simd_shuffle!(b, b, [2, 3]); - vqdmull_s32(a, b) - } + let a = vget_high_s32(a); + let b = vget_high_s32(b); + vqdmull_s32(a, b) } #[doc = "Vector saturating doubling long multiply by scalar"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_laneq_s16)"] @@ -17255,10 +16801,8 @@ pub fn vqdmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmull_laneq_s16(a: int16x4_t, b: int16x8_t) -> int32x4_t { static_assert_uimm_bits!(N, 3); - unsafe { - let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]); - vqdmull_s16(a, b) - } + let b = vdup_laneq_s16::(b); + vqdmull_s16(a, b) } #[doc = "Vector saturating doubling long multiply by scalar"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_laneq_s32)"] @@ -17269,10 +16813,8 @@ pub fn vqdmull_laneq_s16(a: int16x4_t, b: int16x8_t) -> int32x4_t #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmull_laneq_s32(a: int32x2_t, b: int32x4_t) -> int64x2_t { static_assert_uimm_bits!(N, 2); - unsafe { - let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]); - vqdmull_s32(a, b) - } + let b = vdup_laneq_s32::(b); + vqdmull_s32(a, b) } #[doc = "Signed saturating doubling multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmullh_lane_s16)"] @@ -17283,10 +16825,8 @@ pub fn vqdmull_laneq_s32(a: int32x2_t, b: int32x4_t) -> int64x2_t #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmullh_lane_s16(a: i16, b: int16x4_t) -> i32 { static_assert_uimm_bits!(N, 2); - unsafe { - let b: i16 = simd_extract!(b, N as u32); - vqdmullh_s16(a, b) - } + let b: i16 = vget_lane_s16::(b); + vqdmullh_s16(a, b) } #[doc = "Signed saturating doubling multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulls_laneq_s32)"] @@ -17297,10 +16837,8 @@ pub fn vqdmullh_lane_s16(a: i16, b: int16x4_t) -> i32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmulls_laneq_s32(a: i32, b: int32x4_t) -> i64 { static_assert_uimm_bits!(N, 2); - unsafe { - let b: i32 = simd_extract!(b, N as u32); - vqdmulls_s32(a, b) - } + let b: i32 = vgetq_lane_s32::(b); + vqdmulls_s32(a, b) } #[doc = "Signed saturating doubling multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmullh_laneq_s16)"] @@ -17311,10 +16849,8 @@ pub fn vqdmulls_laneq_s32(a: i32, b: int32x4_t) -> i64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmullh_laneq_s16(a: i16, b: int16x8_t) -> i32 { static_assert_uimm_bits!(N, 3); - unsafe { - let b: i16 = simd_extract!(b, N as u32); - vqdmullh_s16(a, b) - } + let b: i16 = vgetq_lane_s16::(b); + vqdmullh_s16(a, b) } #[doc = "Signed saturating doubling multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmullh_s16)"] @@ -17325,7 +16861,7 @@ pub fn vqdmullh_laneq_s16(a: i16, b: int16x8_t) -> i32 { pub fn vqdmullh_s16(a: i16, b: i16) -> i32 { let a: int16x4_t = vdup_n_s16(a); let b: int16x4_t = vdup_n_s16(b); - unsafe { simd_extract!(vqdmull_s16(a, b), 0) } + vgetq_lane_s32::<0>(vqdmull_s16(a, b)) } #[doc = "Signed saturating doubling multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulls_lane_s32)"] @@ -17336,10 +16872,8 @@ pub fn vqdmullh_s16(a: i16, b: i16) -> i32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqdmulls_lane_s32(a: i32, b: int32x2_t) -> i64 { static_assert_uimm_bits!(N, 1); - unsafe { - let b: i32 = simd_extract!(b, N as u32); - vqdmulls_s32(a, b) - } + let b: i32 = vget_lane_s32::(b); + vqdmulls_s32(a, b) } #[doc = "Signed saturating doubling multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulls_s32)"] @@ -17361,67 +16895,55 @@ pub fn vqdmulls_s32(a: i32, b: i32) -> i64 { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqxtn2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqxtn2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { - unsafe { - simd_shuffle!( - a, - vqmovn_s16(b), - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - ) - } + vcombine_s8(a, vqmovn_s16(b)) } #[doc = "Signed saturating extract narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqxtn2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqxtn2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t { - unsafe { simd_shuffle!(a, vqmovn_s32(b), [0, 1, 2, 3, 4, 5, 6, 7]) } + vcombine_s16(a, vqmovn_s32(b)) } #[doc = "Signed saturating extract narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_s64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqxtn2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqxtn2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t { - unsafe { simd_shuffle!(a, vqmovn_s64(b), [0, 1, 2, 3]) } + vcombine_s32(a, vqmovn_s64(b)) } #[doc = "Signed saturating extract narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_u16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(uqxtn2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(uqxtn2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t { - unsafe { - simd_shuffle!( - a, - vqmovn_u16(b), - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - ) - } + vcombine_u8(a, vqmovn_u16(b)) } #[doc = "Signed saturating extract narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(uqxtn2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(uqxtn2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t { - unsafe { simd_shuffle!(a, vqmovn_u32(b), [0, 1, 2, 3, 4, 5, 6, 7]) } + vcombine_u16(a, vqmovn_u32(b)) } #[doc = "Signed saturating extract narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_u64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(uqxtn2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(uqxtn2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqmovn_high_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t { - unsafe { simd_shuffle!(a, vqmovn_u64(b), [0, 1, 2, 3]) } + vcombine_u32(a, vqmovn_u64(b)) } #[doc = "Saturating extract narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovnd_s64)"] @@ -17462,7 +16984,7 @@ pub fn vqmovnd_u64(a: u64) -> u32 { #[cfg_attr(test, assert_instr(sqxtn))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqmovnh_s16(a: i16) -> i8 { - unsafe { simd_extract!(vqmovn_s16(vdupq_n_s16(a)), 0) } + vget_lane_s8::<0>(vqmovn_s16(vdupq_n_s16(a))) } #[doc = "Saturating extract narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovns_s32)"] @@ -17471,7 +16993,7 @@ pub fn vqmovnh_s16(a: i16) -> i8 { #[cfg_attr(test, assert_instr(sqxtn))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqmovns_s32(a: i32) -> i16 { - unsafe { simd_extract!(vqmovn_s32(vdupq_n_s32(a)), 0) } + vget_lane_s16::<0>(vqmovn_s32(vdupq_n_s32(a))) } #[doc = "Saturating extract narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovnh_u16)"] @@ -17480,7 +17002,7 @@ pub fn vqmovns_s32(a: i32) -> i16 { #[cfg_attr(test, assert_instr(uqxtn))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqmovnh_u16(a: u16) -> u8 { - unsafe { simd_extract!(vqmovn_u16(vdupq_n_u16(a)), 0) } + vget_lane_u8::<0>(vqmovn_u16(vdupq_n_u16(a))) } #[doc = "Saturating extract narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovns_u32)"] @@ -17489,40 +17011,34 @@ pub fn vqmovnh_u16(a: u16) -> u8 { #[cfg_attr(test, assert_instr(uqxtn))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqmovns_u32(a: u32) -> u16 { - unsafe { simd_extract!(vqmovn_u32(vdupq_n_u32(a)), 0) } + vget_lane_u16::<0>(vqmovn_u32(vdupq_n_u32(a))) } #[doc = "Signed saturating extract unsigned narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_high_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqxtun2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqxtun2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqmovun_high_s16(a: uint8x8_t, b: int16x8_t) -> uint8x16_t { - unsafe { - simd_shuffle!( - a, - vqmovun_s16(b), - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - ) - } + vcombine_u8(a, vqmovun_s16(b)) } #[doc = "Signed saturating extract unsigned narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_high_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqxtun2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqxtun2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqmovun_high_s32(a: uint16x4_t, b: int32x4_t) -> uint16x8_t { - unsafe { simd_shuffle!(a, vqmovun_s32(b), [0, 1, 2, 3, 4, 5, 6, 7]) } + vcombine_u16(a, vqmovun_s32(b)) } #[doc = "Signed saturating extract unsigned narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_high_s64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqxtun2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqxtun2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqmovun_high_s64(a: uint32x2_t, b: int64x2_t) -> uint32x4_t { - unsafe { simd_shuffle!(a, vqmovun_s64(b), [0, 1, 2, 3]) } + vcombine_u32(a, vqmovun_s64(b)) } #[doc = "Signed saturating extract unsigned narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovunh_s16)"] @@ -17531,7 +17047,7 @@ pub fn vqmovun_high_s64(a: uint32x2_t, b: int64x2_t) -> uint32x4_t { #[cfg_attr(test, assert_instr(sqxtun))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqmovunh_s16(a: i16) -> u8 { - unsafe { simd_extract!(vqmovun_s16(vdupq_n_s16(a)), 0) } + vget_lane_u8::<0>(vqmovun_s16(vdupq_n_s16(a))) } #[doc = "Signed saturating extract unsigned narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovuns_s32)"] @@ -17540,7 +17056,7 @@ pub fn vqmovunh_s16(a: i16) -> u8 { #[cfg_attr(test, assert_instr(sqxtun))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqmovuns_s32(a: i32) -> u16 { - unsafe { simd_extract!(vqmovun_s32(vdupq_n_s32(a)), 0) } + vget_lane_u16::<0>(vqmovun_s32(vdupq_n_s32(a))) } #[doc = "Signed saturating extract unsigned narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovund_s64)"] @@ -17549,7 +17065,7 @@ pub fn vqmovuns_s32(a: i32) -> u16 { #[cfg_attr(test, assert_instr(sqxtun))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqmovund_s64(a: i64) -> u32 { - unsafe { simd_extract!(vqmovun_s64(vdupq_n_s64(a)), 0) } + vget_lane_u32::<0>(vqmovun_s64(vdupq_n_s64(a))) } #[doc = "Signed saturating negate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s64)"] @@ -17590,7 +17106,7 @@ pub fn vqnegq_s64(a: int64x2_t) -> int64x2_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(sqneg))] pub fn vqnegb_s8(a: i8) -> i8 { - unsafe { simd_extract!(vqneg_s8(vdup_n_s8(a)), 0) } + vget_lane_s8::<0>(vqneg_s8(vdup_n_s8(a))) } #[doc = "Signed saturating negate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegh_s16)"] @@ -17599,7 +17115,7 @@ pub fn vqnegb_s8(a: i8) -> i8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(sqneg))] pub fn vqnegh_s16(a: i16) -> i16 { - unsafe { simd_extract!(vqneg_s16(vdup_n_s16(a)), 0) } + vget_lane_s16::<0>(vqneg_s16(vdup_n_s16(a))) } #[doc = "Signed saturating negate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegs_s32)"] @@ -17608,7 +17124,7 @@ pub fn vqnegh_s16(a: i16) -> i16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(sqneg))] pub fn vqnegs_s32(a: i32) -> i32 { - unsafe { simd_extract!(vqneg_s32(vdup_n_s32(a)), 0) } + vget_lane_s32::<0>(vqneg_s32(vdup_n_s32(a))) } #[doc = "Signed saturating negate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegd_s64)"] @@ -17617,7 +17133,7 @@ pub fn vqnegs_s32(a: i32) -> i32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(sqneg))] pub fn vqnegd_s64(a: i64) -> i64 { - unsafe { simd_extract!(vqneg_s64(vdup_n_s64(a)), 0) } + vget_lane_s64::<0>(vqneg_s64(vdup_n_s64(a))) } #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_lane_s16)"] @@ -17628,10 +17144,8 @@ pub fn vqnegd_s64(a: i64) -> i64 { #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub fn vqrdmlah_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32; 4]); - vqrdmlah_s16(a, b, c) - } + let c = vdup_lane_s16::(c); + vqrdmlah_s16(a, b, c) } #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_lane_s32)"] @@ -17642,10 +17156,8 @@ pub fn vqrdmlah_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4 #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub fn vqrdmlah_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32; 2]); - vqrdmlah_s32(a, b, c) - } + let c = vdup_lane_s32::(c); + vqrdmlah_s32(a, b, c) } #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_laneq_s16)"] @@ -17656,10 +17168,8 @@ pub fn vqrdmlah_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2 #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub fn vqrdmlah_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { - let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32; 4]); - vqrdmlah_s16(a, b, c) - } + let c = vdup_laneq_s16::(c); + vqrdmlah_s16(a, b, c) } #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_laneq_s32)"] @@ -17670,10 +17180,8 @@ pub fn vqrdmlah_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub fn vqrdmlah_laneq_s32(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32; 2]); - vqrdmlah_s32(a, b, c) - } + let c = vdup_laneq_s32::(c); + vqrdmlah_s32(a, b, c) } #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_lane_s16)"] @@ -17684,10 +17192,8 @@ pub fn vqrdmlah_laneq_s32(a: int32x2_t, b: int32x2_t, c: int32x #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub fn vqrdmlahq_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - let c: int16x8_t = simd_shuffle!(c, c, [LANE as u32; 8]); - vqrdmlahq_s16(a, b, c) - } + let c = vdupq_lane_s16::(c); + vqrdmlahq_s16(a, b, c) } #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_lane_s32)"] @@ -17698,10 +17204,8 @@ pub fn vqrdmlahq_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub fn vqrdmlahq_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32; 4]); - vqrdmlahq_s32(a, b, c) - } + let c = vdupq_lane_s32::(c); + vqrdmlahq_s32(a, b, c) } #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_laneq_s16)"] @@ -17712,10 +17216,8 @@ pub fn vqrdmlahq_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub fn vqrdmlahq_laneq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { static_assert_uimm_bits!(LANE, 3); - unsafe { - let c: int16x8_t = simd_shuffle!(c, c, [LANE as u32; 8]); - vqrdmlahq_s16(a, b, c) - } + let c = vdupq_laneq_s16::(c); + vqrdmlahq_s16(a, b, c) } #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_laneq_s32)"] @@ -17726,10 +17228,8 @@ pub fn vqrdmlahq_laneq_s16(a: int16x8_t, b: int16x8_t, c: int16 #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub fn vqrdmlahq_laneq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32; 4]); - vqrdmlahq_s32(a, b, c) - } + let c = vdupq_laneq_s32::(c); + vqrdmlahq_s32(a, b, c) } #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_s16)"] @@ -17804,7 +17304,7 @@ pub fn vqrdmlahq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub fn vqrdmlahh_lane_s16(a: i16, b: i16, c: int16x4_t) -> i16 { static_assert_uimm_bits!(LANE, 2); - unsafe { vqrdmlahh_s16(a, b, simd_extract!(c, LANE as u32)) } + vqrdmlahh_s16(a, b, vget_lane_s16::(c)) } #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahh_laneq_s16)"] @@ -17815,7 +17315,7 @@ pub fn vqrdmlahh_lane_s16(a: i16, b: i16, c: int16x4_t) -> i16 #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub fn vqrdmlahh_laneq_s16(a: i16, b: i16, c: int16x8_t) -> i16 { static_assert_uimm_bits!(LANE, 3); - unsafe { vqrdmlahh_s16(a, b, simd_extract!(c, LANE as u32)) } + vqrdmlahh_s16(a, b, vgetq_lane_s16::(c)) } #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahs_lane_s32)"] @@ -17826,7 +17326,7 @@ pub fn vqrdmlahh_laneq_s16(a: i16, b: i16, c: int16x8_t) -> i16 #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub fn vqrdmlahs_lane_s32(a: i32, b: i32, c: int32x2_t) -> i32 { static_assert_uimm_bits!(LANE, 1); - unsafe { vqrdmlahs_s32(a, b, simd_extract!(c, LANE as u32)) } + vqrdmlahs_s32(a, b, vget_lane_s32::(c)) } #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahs_laneq_s32)"] @@ -17837,7 +17337,7 @@ pub fn vqrdmlahs_lane_s32(a: i32, b: i32, c: int32x2_t) -> i32 #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub fn vqrdmlahs_laneq_s32(a: i32, b: i32, c: int32x4_t) -> i32 { static_assert_uimm_bits!(LANE, 2); - unsafe { vqrdmlahs_s32(a, b, simd_extract!(c, LANE as u32)) } + vqrdmlahs_s32(a, b, vgetq_lane_s32::(c)) } #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahh_s16)"] @@ -17849,7 +17349,7 @@ pub fn vqrdmlahh_s16(a: i16, b: i16, c: i16) -> i16 { let a: int16x4_t = vdup_n_s16(a); let b: int16x4_t = vdup_n_s16(b); let c: int16x4_t = vdup_n_s16(c); - unsafe { simd_extract!(vqrdmlah_s16(a, b, c), 0) } + vget_lane_s16::<0>(vqrdmlah_s16(a, b, c)) } #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahs_s32)"] @@ -17861,7 +17361,7 @@ pub fn vqrdmlahs_s32(a: i32, b: i32, c: i32) -> i32 { let a: int32x2_t = vdup_n_s32(a); let b: int32x2_t = vdup_n_s32(b); let c: int32x2_t = vdup_n_s32(c); - unsafe { simd_extract!(vqrdmlah_s32(a, b, c), 0) } + vget_lane_s32::<0>(vqrdmlah_s32(a, b, c)) } #[doc = "Signed saturating rounding doubling multiply subtract returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_lane_s16)"] @@ -17872,10 +17372,8 @@ pub fn vqrdmlahs_s32(a: i32, b: i32, c: i32) -> i32 { #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub fn vqrdmlsh_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32; 4]); - vqrdmlsh_s16(a, b, c) - } + let c = vdup_lane_s16::(c); + vqrdmlsh_s16(a, b, c) } #[doc = "Signed saturating rounding doubling multiply subtract returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_lane_s32)"] @@ -17886,10 +17384,8 @@ pub fn vqrdmlsh_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4 #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub fn vqrdmlsh_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32; 2]); - vqrdmlsh_s32(a, b, c) - } + let c = vdup_lane_s32::(c); + vqrdmlsh_s32(a, b, c) } #[doc = "Signed saturating rounding doubling multiply subtract returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_laneq_s16)"] @@ -17900,10 +17396,8 @@ pub fn vqrdmlsh_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2 #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub fn vqrdmlsh_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { - let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32; 4]); - vqrdmlsh_s16(a, b, c) - } + let c = vdup_laneq_s16::(c); + vqrdmlsh_s16(a, b, c) } #[doc = "Signed saturating rounding doubling multiply subtract returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_laneq_s32)"] @@ -17914,10 +17408,8 @@ pub fn vqrdmlsh_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub fn vqrdmlsh_laneq_s32(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32; 2]); - vqrdmlsh_s32(a, b, c) - } + let c = vdup_laneq_s32::(c); + vqrdmlsh_s32(a, b, c) } #[doc = "Signed saturating rounding doubling multiply subtract returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_lane_s16)"] @@ -17928,10 +17420,8 @@ pub fn vqrdmlsh_laneq_s32(a: int32x2_t, b: int32x2_t, c: int32x #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub fn vqrdmlshq_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - let c: int16x8_t = simd_shuffle!(c, c, [LANE as u32; 8]); - vqrdmlshq_s16(a, b, c) - } + let c = vdupq_lane_s16::(c); + vqrdmlshq_s16(a, b, c) } #[doc = "Signed saturating rounding doubling multiply subtract returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_lane_s32)"] @@ -17942,10 +17432,8 @@ pub fn vqrdmlshq_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub fn vqrdmlshq_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32; 4]); - vqrdmlshq_s32(a, b, c) - } + let c = vdupq_lane_s32::(c); + vqrdmlshq_s32(a, b, c) } #[doc = "Signed saturating rounding doubling multiply subtract returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_laneq_s16)"] @@ -17956,10 +17444,8 @@ pub fn vqrdmlshq_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub fn vqrdmlshq_laneq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { static_assert_uimm_bits!(LANE, 3); - unsafe { - let c: int16x8_t = simd_shuffle!(c, c, [LANE as u32; 8]); - vqrdmlshq_s16(a, b, c) - } + let c = vdupq_laneq_s16::(c); + vqrdmlshq_s16(a, b, c) } #[doc = "Signed saturating rounding doubling multiply subtract returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_laneq_s32)"] @@ -17970,10 +17456,8 @@ pub fn vqrdmlshq_laneq_s16(a: int16x8_t, b: int16x8_t, c: int16 #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub fn vqrdmlshq_laneq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32; 4]); - vqrdmlshq_s32(a, b, c) - } + let c = vdupq_laneq_s32::(c); + vqrdmlshq_s32(a, b, c) } #[doc = "Signed saturating rounding doubling multiply subtract returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_s16)"] @@ -18048,7 +17532,7 @@ pub fn vqrdmlshq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub fn vqrdmlshh_lane_s16(a: i16, b: i16, c: int16x4_t) -> i16 { static_assert_uimm_bits!(LANE, 2); - unsafe { vqrdmlshh_s16(a, b, simd_extract!(c, LANE as u32)) } + vqrdmlshh_s16(a, b, vget_lane_s16::(c)) } #[doc = "Signed saturating rounding doubling multiply subtract returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshh_laneq_s16)"] @@ -18059,7 +17543,7 @@ pub fn vqrdmlshh_lane_s16(a: i16, b: i16, c: int16x4_t) -> i16 #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub fn vqrdmlshh_laneq_s16(a: i16, b: i16, c: int16x8_t) -> i16 { static_assert_uimm_bits!(LANE, 3); - unsafe { vqrdmlshh_s16(a, b, simd_extract!(c, LANE as u32)) } + vqrdmlshh_s16(a, b, vgetq_lane_s16::(c)) } #[doc = "Signed saturating rounding doubling multiply subtract returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshs_lane_s32)"] @@ -18070,7 +17554,7 @@ pub fn vqrdmlshh_laneq_s16(a: i16, b: i16, c: int16x8_t) -> i16 #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub fn vqrdmlshs_lane_s32(a: i32, b: i32, c: int32x2_t) -> i32 { static_assert_uimm_bits!(LANE, 1); - unsafe { vqrdmlshs_s32(a, b, simd_extract!(c, LANE as u32)) } + vqrdmlshs_s32(a, b, vget_lane_s32::(c)) } #[doc = "Signed saturating rounding doubling multiply subtract returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshs_laneq_s32)"] @@ -18081,7 +17565,7 @@ pub fn vqrdmlshs_lane_s32(a: i32, b: i32, c: int32x2_t) -> i32 #[stable(feature = "rdm_intrinsics", since = "1.62.0")] pub fn vqrdmlshs_laneq_s32(a: i32, b: i32, c: int32x4_t) -> i32 { static_assert_uimm_bits!(LANE, 2); - unsafe { vqrdmlshs_s32(a, b, simd_extract!(c, LANE as u32)) } + vqrdmlshs_s32(a, b, vgetq_lane_s32::(c)) } #[doc = "Signed saturating rounding doubling multiply subtract returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshh_s16)"] @@ -18093,7 +17577,7 @@ pub fn vqrdmlshh_s16(a: i16, b: i16, c: i16) -> i16 { let a: int16x4_t = vdup_n_s16(a); let b: int16x4_t = vdup_n_s16(b); let c: int16x4_t = vdup_n_s16(c); - unsafe { simd_extract!(vqrdmlsh_s16(a, b, c), 0) } + vget_lane_s16::<0>(vqrdmlsh_s16(a, b, c)) } #[doc = "Signed saturating rounding doubling multiply subtract returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshs_s32)"] @@ -18105,7 +17589,7 @@ pub fn vqrdmlshs_s32(a: i32, b: i32, c: i32) -> i32 { let a: int32x2_t = vdup_n_s32(a); let b: int32x2_t = vdup_n_s32(b); let c: int32x2_t = vdup_n_s32(c); - unsafe { simd_extract!(vqrdmlsh_s32(a, b, c), 0) } + vget_lane_s32::<0>(vqrdmlsh_s32(a, b, c)) } #[doc = "Signed saturating rounding doubling multiply returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhh_lane_s16)"] @@ -18116,7 +17600,7 @@ pub fn vqrdmlshs_s32(a: i32, b: i32, c: i32) -> i32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqrdmulhh_lane_s16(a: i16, b: int16x4_t) -> i16 { static_assert_uimm_bits!(LANE, 2); - unsafe { vqrdmulhh_s16(a, simd_extract!(b, LANE as u32)) } + vqrdmulhh_s16(a, vget_lane_s16::(b)) } #[doc = "Signed saturating rounding doubling multiply returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhh_laneq_s16)"] @@ -18127,7 +17611,7 @@ pub fn vqrdmulhh_lane_s16(a: i16, b: int16x4_t) -> i16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqrdmulhh_laneq_s16(a: i16, b: int16x8_t) -> i16 { static_assert_uimm_bits!(LANE, 3); - unsafe { vqrdmulhh_s16(a, simd_extract!(b, LANE as u32)) } + vqrdmulhh_s16(a, vgetq_lane_s16::(b)) } #[doc = "Signed saturating rounding doubling multiply returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhs_lane_s32)"] @@ -18138,7 +17622,7 @@ pub fn vqrdmulhh_laneq_s16(a: i16, b: int16x8_t) -> i16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqrdmulhs_lane_s32(a: i32, b: int32x2_t) -> i32 { static_assert_uimm_bits!(LANE, 1); - unsafe { vqrdmulhs_s32(a, simd_extract!(b, LANE as u32)) } + vqrdmulhs_s32(a, vget_lane_s32::(b)) } #[doc = "Signed saturating rounding doubling multiply returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhs_laneq_s32)"] @@ -18149,7 +17633,7 @@ pub fn vqrdmulhs_lane_s32(a: i32, b: int32x2_t) -> i32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqrdmulhs_laneq_s32(a: i32, b: int32x4_t) -> i32 { static_assert_uimm_bits!(LANE, 2); - unsafe { vqrdmulhs_s32(a, simd_extract!(b, LANE as u32)) } + vqrdmulhs_s32(a, vgetq_lane_s32::(b)) } #[doc = "Signed saturating rounding doubling multiply returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhh_s16)"] @@ -18158,7 +17642,7 @@ pub fn vqrdmulhs_laneq_s32(a: i32, b: int32x4_t) -> i32 { #[cfg_attr(test, assert_instr(sqrdmulh))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqrdmulhh_s16(a: i16, b: i16) -> i16 { - unsafe { simd_extract!(vqrdmulh_s16(vdup_n_s16(a), vdup_n_s16(b)), 0) } + vget_lane_s16::<0>(vqrdmulh_s16(vdup_n_s16(a), vdup_n_s16(b))) } #[doc = "Signed saturating rounding doubling multiply returning high half"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhs_s32)"] @@ -18167,7 +17651,7 @@ pub fn vqrdmulhh_s16(a: i16, b: i16) -> i16 { #[cfg_attr(test, assert_instr(sqrdmulh))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqrdmulhs_s32(a: i32, b: i32) -> i32 { - unsafe { simd_extract!(vqrdmulh_s32(vdup_n_s32(a), vdup_n_s32(b)), 0) } + vget_lane_s32::<0>(vqrdmulh_s32(vdup_n_s32(a), vdup_n_s32(b))) } #[doc = "Signed saturating rounding shift left"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlb_s8)"] @@ -18178,7 +17662,7 @@ pub fn vqrdmulhs_s32(a: i32, b: i32) -> i32 { pub fn vqrshlb_s8(a: i8, b: i8) -> i8 { let a: int8x8_t = vdup_n_s8(a); let b: int8x8_t = vdup_n_s8(b); - unsafe { simd_extract!(vqrshl_s8(a, b), 0) } + vget_lane_s8::<0>(vqrshl_s8(a, b)) } #[doc = "Signed saturating rounding shift left"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlh_s16)"] @@ -18189,7 +17673,7 @@ pub fn vqrshlb_s8(a: i8, b: i8) -> i8 { pub fn vqrshlh_s16(a: i16, b: i16) -> i16 { let a: int16x4_t = vdup_n_s16(a); let b: int16x4_t = vdup_n_s16(b); - unsafe { simd_extract!(vqrshl_s16(a, b), 0) } + vget_lane_s16::<0>(vqrshl_s16(a, b)) } #[doc = "Unsigned signed saturating rounding shift left"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlb_u8)"] @@ -18200,7 +17684,7 @@ pub fn vqrshlh_s16(a: i16, b: i16) -> i16 { pub fn vqrshlb_u8(a: u8, b: i8) -> u8 { let a: uint8x8_t = vdup_n_u8(a); let b: int8x8_t = vdup_n_s8(b); - unsafe { simd_extract!(vqrshl_u8(a, b), 0) } + vget_lane_u8::<0>(vqrshl_u8(a, b)) } #[doc = "Unsigned signed saturating rounding shift left"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlh_u16)"] @@ -18211,7 +17695,7 @@ pub fn vqrshlb_u8(a: u8, b: i8) -> u8 { pub fn vqrshlh_u16(a: u16, b: i16) -> u16 { let a: uint16x4_t = vdup_n_u16(a); let b: int16x4_t = vdup_n_s16(b); - unsafe { simd_extract!(vqrshl_u16(a, b), 0) } + vget_lane_u16::<0>(vqrshl_u16(a, b)) } #[doc = "Signed saturating rounding shift left"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshld_s64)"] @@ -18281,79 +17765,67 @@ pub fn vqrshld_u64(a: u64, b: i64) -> u64 { #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqrshrn2, N = 2))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqrshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { static_assert!(N >= 1 && N <= 8); - unsafe { - simd_shuffle!( - a, - vqrshrn_n_s16::(b), - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - ) - } + vcombine_s8(a, vqrshrn_n_s16::(b)) } #[doc = "Signed saturating rounded shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqrshrn2, N = 2))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqrshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t { static_assert!(N >= 1 && N <= 16); - unsafe { simd_shuffle!(a, vqrshrn_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } + vcombine_s16(a, vqrshrn_n_s32::(b)) } #[doc = "Signed saturating rounded shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_s64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqrshrn2, N = 2))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqrshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t { static_assert!(N >= 1 && N <= 32); - unsafe { simd_shuffle!(a, vqrshrn_n_s64::(b), [0, 1, 2, 3]) } + vcombine_s32(a, vqrshrn_n_s64::(b)) } #[doc = "Unsigned saturating rounded shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_u16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(uqrshrn2, N = 2))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqrshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t { static_assert!(N >= 1 && N <= 8); - unsafe { - simd_shuffle!( - a, - vqrshrn_n_u16::(b), - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - ) - } + vcombine_u8(a, vqrshrn_n_u16::(b)) } #[doc = "Unsigned saturating rounded shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(uqrshrn2, N = 2))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqrshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t { static_assert!(N >= 1 && N <= 16); - unsafe { simd_shuffle!(a, vqrshrn_n_u32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } + vcombine_u16(a, vqrshrn_n_u32::(b)) } #[doc = "Unsigned saturating rounded shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_u64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(uqrshrn2, N = 2))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqrshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t { static_assert!(N >= 1 && N <= 32); - unsafe { simd_shuffle!(a, vqrshrn_n_u64::(b), [0, 1, 2, 3]) } + vcombine_u32(a, vqrshrn_n_u64::(b)) } #[doc = "Unsigned saturating rounded shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrnd_n_u64)"] @@ -18365,7 +17837,7 @@ pub fn vqrshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x pub fn vqrshrnd_n_u64(a: u64) -> u32 { static_assert!(N >= 1 && N <= 32); let a: uint64x2_t = vdupq_n_u64(a); - unsafe { simd_extract!(vqrshrn_n_u64::(a), 0) } + vget_lane_u32::<0>(vqrshrn_n_u64::(a)) } #[doc = "Unsigned saturating rounded shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrnh_n_u16)"] @@ -18377,7 +17849,7 @@ pub fn vqrshrnd_n_u64(a: u64) -> u32 { pub fn vqrshrnh_n_u16(a: u16) -> u8 { static_assert!(N >= 1 && N <= 8); let a: uint16x8_t = vdupq_n_u16(a); - unsafe { simd_extract!(vqrshrn_n_u16::(a), 0) } + vget_lane_u8::<0>(vqrshrn_n_u16::(a)) } #[doc = "Unsigned saturating rounded shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrns_n_u32)"] @@ -18389,7 +17861,7 @@ pub fn vqrshrnh_n_u16(a: u16) -> u8 { pub fn vqrshrns_n_u32(a: u32) -> u16 { static_assert!(N >= 1 && N <= 16); let a: uint32x4_t = vdupq_n_u32(a); - unsafe { simd_extract!(vqrshrn_n_u32::(a), 0) } + vget_lane_u16::<0>(vqrshrn_n_u32::(a)) } #[doc = "Signed saturating rounded shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrnh_n_s16)"] @@ -18401,7 +17873,7 @@ pub fn vqrshrns_n_u32(a: u32) -> u16 { pub fn vqrshrnh_n_s16(a: i16) -> i8 { static_assert!(N >= 1 && N <= 8); let a: int16x8_t = vdupq_n_s16(a); - unsafe { simd_extract!(vqrshrn_n_s16::(a), 0) } + vget_lane_s8::<0>(vqrshrn_n_s16::(a)) } #[doc = "Signed saturating rounded shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrns_n_s32)"] @@ -18413,7 +17885,7 @@ pub fn vqrshrnh_n_s16(a: i16) -> i8 { pub fn vqrshrns_n_s32(a: i32) -> i16 { static_assert!(N >= 1 && N <= 16); let a: int32x4_t = vdupq_n_s32(a); - unsafe { simd_extract!(vqrshrn_n_s32::(a), 0) } + vget_lane_s16::<0>(vqrshrn_n_s32::(a)) } #[doc = "Signed saturating rounded shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrnd_n_s64)"] @@ -18425,46 +17897,40 @@ pub fn vqrshrns_n_s32(a: i32) -> i16 { pub fn vqrshrnd_n_s64(a: i64) -> i32 { static_assert!(N >= 1 && N <= 32); let a: int64x2_t = vdupq_n_s64(a); - unsafe { simd_extract!(vqrshrn_n_s64::(a), 0) } + vget_lane_s32::<0>(vqrshrn_n_s64::(a)) } #[doc = "Signed saturating rounded shift right unsigned narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_high_n_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqrshrun2, N = 2))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqrshrun_high_n_s16(a: uint8x8_t, b: int16x8_t) -> uint8x16_t { static_assert!(N >= 1 && N <= 8); - unsafe { - simd_shuffle!( - a, - vqrshrun_n_s16::(b), - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - ) - } + vcombine_u8(a, vqrshrun_n_s16::(b)) } #[doc = "Signed saturating rounded shift right unsigned narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_high_n_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqrshrun2, N = 2))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqrshrun_high_n_s32(a: uint16x4_t, b: int32x4_t) -> uint16x8_t { static_assert!(N >= 1 && N <= 16); - unsafe { simd_shuffle!(a, vqrshrun_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } + vcombine_u16(a, vqrshrun_n_s32::(b)) } #[doc = "Signed saturating rounded shift right unsigned narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_high_n_s64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqrshrun2, N = 2))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqrshrun_high_n_s64(a: uint32x2_t, b: int64x2_t) -> uint32x4_t { static_assert!(N >= 1 && N <= 32); - unsafe { simd_shuffle!(a, vqrshrun_n_s64::(b), [0, 1, 2, 3]) } + vcombine_u32(a, vqrshrun_n_s64::(b)) } #[doc = "Signed saturating rounded shift right unsigned narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrund_n_s64)"] @@ -18476,7 +17942,7 @@ pub fn vqrshrun_high_n_s64(a: uint32x2_t, b: int64x2_t) -> uint32x pub fn vqrshrund_n_s64(a: i64) -> u32 { static_assert!(N >= 1 && N <= 32); let a: int64x2_t = vdupq_n_s64(a); - unsafe { simd_extract!(vqrshrun_n_s64::(a), 0) } + vget_lane_u32::<0>(vqrshrun_n_s64::(a)) } #[doc = "Signed saturating rounded shift right unsigned narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrunh_n_s16)"] @@ -18488,7 +17954,7 @@ pub fn vqrshrund_n_s64(a: i64) -> u32 { pub fn vqrshrunh_n_s16(a: i16) -> u8 { static_assert!(N >= 1 && N <= 8); let a: int16x8_t = vdupq_n_s16(a); - unsafe { simd_extract!(vqrshrun_n_s16::(a), 0) } + vget_lane_u8::<0>(vqrshrun_n_s16::(a)) } #[doc = "Signed saturating rounded shift right unsigned narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshruns_n_s32)"] @@ -18500,7 +17966,7 @@ pub fn vqrshrunh_n_s16(a: i16) -> u8 { pub fn vqrshruns_n_s32(a: i32) -> u16 { static_assert!(N >= 1 && N <= 16); let a: int32x4_t = vdupq_n_s32(a); - unsafe { simd_extract!(vqrshrun_n_s32::(a), 0) } + vget_lane_u16::<0>(vqrshrun_n_s32::(a)) } #[doc = "Signed saturating shift left"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlb_n_s8)"] @@ -18511,7 +17977,7 @@ pub fn vqrshruns_n_s32(a: i32) -> u16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshlb_n_s8(a: i8) -> i8 { static_assert_uimm_bits!(N, 3); - unsafe { simd_extract!(vqshl_n_s8::(vdup_n_s8(a)), 0) } + vget_lane_s8::<0>(vqshl_n_s8::(vdup_n_s8(a))) } #[doc = "Signed saturating shift left"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshld_n_s64)"] @@ -18522,7 +17988,7 @@ pub fn vqshlb_n_s8(a: i8) -> i8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshld_n_s64(a: i64) -> i64 { static_assert_uimm_bits!(N, 6); - unsafe { simd_extract!(vqshl_n_s64::(vdup_n_s64(a)), 0) } + vget_lane_s64::<0>(vqshl_n_s64::(vdup_n_s64(a))) } #[doc = "Signed saturating shift left"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlh_n_s16)"] @@ -18533,7 +17999,7 @@ pub fn vqshld_n_s64(a: i64) -> i64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshlh_n_s16(a: i16) -> i16 { static_assert_uimm_bits!(N, 4); - unsafe { simd_extract!(vqshl_n_s16::(vdup_n_s16(a)), 0) } + vget_lane_s16::<0>(vqshl_n_s16::(vdup_n_s16(a))) } #[doc = "Signed saturating shift left"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshls_n_s32)"] @@ -18544,7 +18010,7 @@ pub fn vqshlh_n_s16(a: i16) -> i16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshls_n_s32(a: i32) -> i32 { static_assert_uimm_bits!(N, 5); - unsafe { simd_extract!(vqshl_n_s32::(vdup_n_s32(a)), 0) } + vget_lane_s32::<0>(vqshl_n_s32::(vdup_n_s32(a))) } #[doc = "Unsigned saturating shift left"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlb_n_u8)"] @@ -18555,7 +18021,7 @@ pub fn vqshls_n_s32(a: i32) -> i32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshlb_n_u8(a: u8) -> u8 { static_assert_uimm_bits!(N, 3); - unsafe { simd_extract!(vqshl_n_u8::(vdup_n_u8(a)), 0) } + vget_lane_u8::<0>(vqshl_n_u8::(vdup_n_u8(a))) } #[doc = "Unsigned saturating shift left"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshld_n_u64)"] @@ -18566,7 +18032,7 @@ pub fn vqshlb_n_u8(a: u8) -> u8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshld_n_u64(a: u64) -> u64 { static_assert_uimm_bits!(N, 6); - unsafe { simd_extract!(vqshl_n_u64::(vdup_n_u64(a)), 0) } + vget_lane_u64::<0>(vqshl_n_u64::(vdup_n_u64(a))) } #[doc = "Unsigned saturating shift left"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlh_n_u16)"] @@ -18577,7 +18043,7 @@ pub fn vqshld_n_u64(a: u64) -> u64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshlh_n_u16(a: u16) -> u16 { static_assert_uimm_bits!(N, 4); - unsafe { simd_extract!(vqshl_n_u16::(vdup_n_u16(a)), 0) } + vget_lane_u16::<0>(vqshl_n_u16::(vdup_n_u16(a))) } #[doc = "Unsigned saturating shift left"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshls_n_u32)"] @@ -18588,7 +18054,7 @@ pub fn vqshlh_n_u16(a: u16) -> u16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshls_n_u32(a: u32) -> u32 { static_assert_uimm_bits!(N, 5); - unsafe { simd_extract!(vqshl_n_u32::(vdup_n_u32(a)), 0) } + vget_lane_u32::<0>(vqshl_n_u32::(vdup_n_u32(a))) } #[doc = "Signed saturating shift left"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlb_s8)"] @@ -18598,7 +18064,7 @@ pub fn vqshls_n_u32(a: u32) -> u32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshlb_s8(a: i8, b: i8) -> i8 { let c: int8x8_t = vqshl_s8(vdup_n_s8(a), vdup_n_s8(b)); - unsafe { simd_extract!(c, 0) } + vget_lane_s8::<0>(c) } #[doc = "Signed saturating shift left"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlh_s16)"] @@ -18608,7 +18074,7 @@ pub fn vqshlb_s8(a: i8, b: i8) -> i8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshlh_s16(a: i16, b: i16) -> i16 { let c: int16x4_t = vqshl_s16(vdup_n_s16(a), vdup_n_s16(b)); - unsafe { simd_extract!(c, 0) } + vget_lane_s16::<0>(c) } #[doc = "Signed saturating shift left"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshls_s32)"] @@ -18618,7 +18084,7 @@ pub fn vqshlh_s16(a: i16, b: i16) -> i16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshls_s32(a: i32, b: i32) -> i32 { let c: int32x2_t = vqshl_s32(vdup_n_s32(a), vdup_n_s32(b)); - unsafe { simd_extract!(c, 0) } + vget_lane_s32::<0>(c) } #[doc = "Unsigned saturating shift left"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlb_u8)"] @@ -18628,7 +18094,7 @@ pub fn vqshls_s32(a: i32, b: i32) -> i32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshlb_u8(a: u8, b: i8) -> u8 { let c: uint8x8_t = vqshl_u8(vdup_n_u8(a), vdup_n_s8(b)); - unsafe { simd_extract!(c, 0) } + vget_lane_u8::<0>(c) } #[doc = "Unsigned saturating shift left"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlh_u16)"] @@ -18638,7 +18104,7 @@ pub fn vqshlb_u8(a: u8, b: i8) -> u8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshlh_u16(a: u16, b: i16) -> u16 { let c: uint16x4_t = vqshl_u16(vdup_n_u16(a), vdup_n_s16(b)); - unsafe { simd_extract!(c, 0) } + vget_lane_u16::<0>(c) } #[doc = "Unsigned saturating shift left"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshls_u32)"] @@ -18648,7 +18114,7 @@ pub fn vqshlh_u16(a: u16, b: i16) -> u16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshls_u32(a: u32, b: i32) -> u32 { let c: uint32x2_t = vqshl_u32(vdup_n_u32(a), vdup_n_s32(b)); - unsafe { simd_extract!(c, 0) } + vget_lane_u32::<0>(c) } #[doc = "Signed saturating shift left"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshld_s64)"] @@ -18691,7 +18157,7 @@ pub fn vqshld_u64(a: u64, b: i64) -> u64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshlub_n_s8(a: i8) -> u8 { static_assert_uimm_bits!(N, 3); - unsafe { simd_extract!(vqshlu_n_s8::(vdup_n_s8(a)), 0) } + vget_lane_u8::<0>(vqshlu_n_s8::(vdup_n_s8(a))) } #[doc = "Signed saturating shift left unsigned"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlud_n_s64)"] @@ -18702,7 +18168,7 @@ pub fn vqshlub_n_s8(a: i8) -> u8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshlud_n_s64(a: i64) -> u64 { static_assert_uimm_bits!(N, 6); - unsafe { simd_extract!(vqshlu_n_s64::(vdup_n_s64(a)), 0) } + vget_lane_u64::<0>(vqshlu_n_s64::(vdup_n_s64(a))) } #[doc = "Signed saturating shift left unsigned"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluh_n_s16)"] @@ -18713,7 +18179,7 @@ pub fn vqshlud_n_s64(a: i64) -> u64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshluh_n_s16(a: i16) -> u16 { static_assert_uimm_bits!(N, 4); - unsafe { simd_extract!(vqshlu_n_s16::(vdup_n_s16(a)), 0) } + vget_lane_u16::<0>(vqshlu_n_s16::(vdup_n_s16(a))) } #[doc = "Signed saturating shift left unsigned"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlus_n_s32)"] @@ -18724,85 +18190,73 @@ pub fn vqshluh_n_s16(a: i16) -> u16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshlus_n_s32(a: i32) -> u32 { static_assert_uimm_bits!(N, 5); - unsafe { simd_extract!(vqshlu_n_s32::(vdup_n_s32(a)), 0) } + vget_lane_u32::<0>(vqshlu_n_s32::(vdup_n_s32(a))) } #[doc = "Signed saturating shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqshrn2, N = 2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqshrn2, N = 2))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { static_assert!(N >= 1 && N <= 8); - unsafe { - simd_shuffle!( - a, - vqshrn_n_s16::(b), - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - ) - } + vcombine_s8(a, vqshrn_n_s16::(b)) } #[doc = "Signed saturating shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqshrn2, N = 2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqshrn2, N = 2))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t { static_assert!(N >= 1 && N <= 16); - unsafe { simd_shuffle!(a, vqshrn_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } + vcombine_s16(a, vqshrn_n_s32::(b)) } #[doc = "Signed saturating shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_s64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqshrn2, N = 2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqshrn2, N = 2))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t { static_assert!(N >= 1 && N <= 32); - unsafe { simd_shuffle!(a, vqshrn_n_s64::(b), [0, 1, 2, 3]) } + vcombine_s32(a, vqshrn_n_s64::(b)) } #[doc = "Unsigned saturating shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_u16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(uqshrn2, N = 2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(uqshrn2, N = 2))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t { static_assert!(N >= 1 && N <= 8); - unsafe { - simd_shuffle!( - a, - vqshrn_n_u16::(b), - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - ) - } + vcombine_u8(a, vqshrn_n_u16::(b)) } #[doc = "Unsigned saturating shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(uqshrn2, N = 2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(uqshrn2, N = 2))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t { static_assert!(N >= 1 && N <= 16); - unsafe { simd_shuffle!(a, vqshrn_n_u32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } + vcombine_u16(a, vqshrn_n_u32::(b)) } #[doc = "Unsigned saturating shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_u64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(uqshrn2, N = 2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(uqshrn2, N = 2))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t { static_assert!(N >= 1 && N <= 32); - unsafe { simd_shuffle!(a, vqshrn_n_u64::(b), [0, 1, 2, 3]) } + vcombine_u32(a, vqshrn_n_u64::(b)) } #[doc = "Signed saturating shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrnd_n_s64)"] @@ -18849,7 +18303,7 @@ pub fn vqshrnd_n_u64(a: u64) -> u32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshrnh_n_s16(a: i16) -> i8 { static_assert!(N >= 1 && N <= 8); - unsafe { simd_extract!(vqshrn_n_s16::(vdupq_n_s16(a)), 0) } + vget_lane_s8::<0>(vqshrn_n_s16::(vdupq_n_s16(a))) } #[doc = "Signed saturating shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrns_n_s32)"] @@ -18860,7 +18314,7 @@ pub fn vqshrnh_n_s16(a: i16) -> i8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshrns_n_s32(a: i32) -> i16 { static_assert!(N >= 1 && N <= 16); - unsafe { simd_extract!(vqshrn_n_s32::(vdupq_n_s32(a)), 0) } + vget_lane_s16::<0>(vqshrn_n_s32::(vdupq_n_s32(a))) } #[doc = "Unsigned saturating shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrnh_n_u16)"] @@ -18871,7 +18325,7 @@ pub fn vqshrns_n_s32(a: i32) -> i16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshrnh_n_u16(a: u16) -> u8 { static_assert!(N >= 1 && N <= 8); - unsafe { simd_extract!(vqshrn_n_u16::(vdupq_n_u16(a)), 0) } + vget_lane_u8::<0>(vqshrn_n_u16::(vdupq_n_u16(a))) } #[doc = "Unsigned saturating shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrns_n_u32)"] @@ -18882,46 +18336,40 @@ pub fn vqshrnh_n_u16(a: u16) -> u8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshrns_n_u32(a: u32) -> u16 { static_assert!(N >= 1 && N <= 16); - unsafe { simd_extract!(vqshrn_n_u32::(vdupq_n_u32(a)), 0) } + vget_lane_u16::<0>(vqshrn_n_u32::(vdupq_n_u32(a))) } #[doc = "Signed saturating shift right unsigned narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_high_n_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqshrun2, N = 2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqshrun2, N = 2))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshrun_high_n_s16(a: uint8x8_t, b: int16x8_t) -> uint8x16_t { static_assert!(N >= 1 && N <= 8); - unsafe { - simd_shuffle!( - a, - vqshrun_n_s16::(b), - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - ) - } + vcombine_u8(a, vqshrun_n_s16::(b)) } #[doc = "Signed saturating shift right unsigned narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_high_n_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqshrun2, N = 2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqshrun2, N = 2))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshrun_high_n_s32(a: uint16x4_t, b: int32x4_t) -> uint16x8_t { static_assert!(N >= 1 && N <= 16); - unsafe { simd_shuffle!(a, vqshrun_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } + vcombine_u16(a, vqshrun_n_s32::(b)) } #[doc = "Signed saturating shift right unsigned narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_high_n_s64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqshrun2, N = 2))] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqshrun2, N = 2))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshrun_high_n_s64(a: uint32x2_t, b: int64x2_t) -> uint32x4_t { static_assert!(N >= 1 && N <= 32); - unsafe { simd_shuffle!(a, vqshrun_n_s64::(b), [0, 1, 2, 3]) } + vcombine_u32(a, vqshrun_n_s64::(b)) } #[doc = "Signed saturating shift right unsigned narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrund_n_s64)"] @@ -18932,7 +18380,7 @@ pub fn vqshrun_high_n_s64(a: uint32x2_t, b: int64x2_t) -> uint32x4 #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshrund_n_s64(a: i64) -> u32 { static_assert!(N >= 1 && N <= 32); - unsafe { simd_extract!(vqshrun_n_s64::(vdupq_n_s64(a)), 0) } + vget_lane_u32::<0>(vqshrun_n_s64::(vdupq_n_s64(a))) } #[doc = "Signed saturating shift right unsigned narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrunh_n_s16)"] @@ -18943,7 +18391,7 @@ pub fn vqshrund_n_s64(a: i64) -> u32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshrunh_n_s16(a: i16) -> u8 { static_assert!(N >= 1 && N <= 8); - unsafe { simd_extract!(vqshrun_n_s16::(vdupq_n_s16(a)), 0) } + vget_lane_u8::<0>(vqshrun_n_s16::(vdupq_n_s16(a))) } #[doc = "Signed saturating shift right unsigned narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshruns_n_s32)"] @@ -18954,7 +18402,7 @@ pub fn vqshrunh_n_s16(a: i16) -> u8 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshruns_n_s32(a: i32) -> u16 { static_assert!(N >= 1 && N <= 16); - unsafe { simd_extract!(vqshrun_n_s32::(vdupq_n_s32(a)), 0) } + vget_lane_u16::<0>(vqshrun_n_s32::(vdupq_n_s32(a))) } #[doc = "Saturating subtract"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubb_s8)"] @@ -18965,7 +18413,7 @@ pub fn vqshruns_n_s32(a: i32) -> u16 { pub fn vqsubb_s8(a: i8, b: i8) -> i8 { let a: int8x8_t = vdup_n_s8(a); let b: int8x8_t = vdup_n_s8(b); - unsafe { simd_extract!(vqsub_s8(a, b), 0) } + vget_lane_s8::<0>(vqsub_s8(a, b)) } #[doc = "Saturating subtract"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubh_s16)"] @@ -18976,7 +18424,7 @@ pub fn vqsubb_s8(a: i8, b: i8) -> i8 { pub fn vqsubh_s16(a: i16, b: i16) -> i16 { let a: int16x4_t = vdup_n_s16(a); let b: int16x4_t = vdup_n_s16(b); - unsafe { simd_extract!(vqsub_s16(a, b), 0) } + vget_lane_s16::<0>(vqsub_s16(a, b)) } #[doc = "Saturating subtract"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubb_u8)"] @@ -18987,7 +18435,7 @@ pub fn vqsubh_s16(a: i16, b: i16) -> i16 { pub fn vqsubb_u8(a: u8, b: u8) -> u8 { let a: uint8x8_t = vdup_n_u8(a); let b: uint8x8_t = vdup_n_u8(b); - unsafe { simd_extract!(vqsub_u8(a, b), 0) } + vget_lane_u8::<0>(vqsub_u8(a, b)) } #[doc = "Saturating subtract"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubh_u16)"] @@ -18998,7 +18446,7 @@ pub fn vqsubb_u8(a: u8, b: u8) -> u8 { pub fn vqsubh_u16(a: u16, b: u16) -> u16 { let a: uint16x4_t = vdup_n_u16(a); let b: uint16x4_t = vdup_n_u16(b); - unsafe { simd_extract!(vqsub_u16(a, b), 0) } + vget_lane_u16::<0>(vqsub_u16(a, b)) } #[doc = "Saturating subtract"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubs_s32)"] @@ -19099,6 +18547,7 @@ fn vqtbl1q(a: int8x16_t, b: uint8x16_t) -> int8x16_t { #[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19106,8 +18555,25 @@ pub fn vqtbl1_s8(a: int8x16_t, b: uint8x8_t) -> int8x8_t { vqtbl1(a, b) } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl1_s8(a: int8x16_t, b: uint8x8_t) -> int8x8_t { + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = vqtbl1(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1q_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19115,8 +18581,30 @@ pub fn vqtbl1q_s8(a: int8x16_t, b: uint8x16_t) -> int8x16_t { vqtbl1q(a, b) } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1q_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl1q_s8(a: int8x16_t, b: uint8x16_t) -> int8x16_t { + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = vqtbl1q(a, b); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19124,8 +18612,25 @@ pub fn vqtbl1_u8(a: uint8x16_t, b: uint8x8_t) -> uint8x8_t { unsafe { transmute(vqtbl1(transmute(a), b)) } } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl1_u8(a: uint8x16_t, b: uint8x8_t) -> uint8x8_t { + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = transmute(vqtbl1(transmute(a), b)); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1q_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19133,8 +18638,30 @@ pub fn vqtbl1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { unsafe { transmute(vqtbl1q(transmute(a), b)) } } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1q_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = transmute(vqtbl1q(transmute(a), b)); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19142,8 +18669,25 @@ pub fn vqtbl1_p8(a: poly8x16_t, b: uint8x8_t) -> poly8x8_t { unsafe { transmute(vqtbl1(transmute(a), b)) } } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl1_p8(a: poly8x16_t, b: uint8x8_t) -> poly8x8_t { + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = transmute(vqtbl1(transmute(a), b)); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1q_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19151,6 +18695,27 @@ pub fn vqtbl1q_p8(a: poly8x16_t, b: uint8x16_t) -> poly8x16_t { unsafe { transmute(vqtbl1q(transmute(a), b)) } } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1q_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl1q_p8(a: poly8x16_t, b: uint8x16_t) -> poly8x16_t { + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = transmute(vqtbl1q(transmute(a), b)); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2)"] #[inline] #[target_feature(enable = "neon")] @@ -19185,6 +18750,7 @@ fn vqtbl2q(a: int8x16_t, b: int8x16_t, c: uint8x16_t) -> int8x16_t { #[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19192,8 +18758,34 @@ pub fn vqtbl2_s8(a: int8x16x2_t, b: uint8x8_t) -> int8x8_t { vqtbl2(a.0, a.1, b) } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl2_s8(a: int8x16x2_t, b: uint8x8_t) -> int8x8_t { + let mut a: int8x16x2_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = vqtbl2(a.0, a.1, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2q_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19201,8 +18793,39 @@ pub fn vqtbl2q_s8(a: int8x16x2_t, b: uint8x16_t) -> int8x16_t { vqtbl2q(a.0, a.1, b) } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2q_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl2q_s8(a: int8x16x2_t, b: uint8x16_t) -> int8x16_t { + let mut a: int8x16x2_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = vqtbl2q(a.0, a.1, b); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19210,8 +18833,34 @@ pub fn vqtbl2_u8(a: uint8x16x2_t, b: uint8x8_t) -> uint8x8_t { unsafe { transmute(vqtbl2(transmute(a.0), transmute(a.1), b)) } } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl2_u8(a: uint8x16x2_t, b: uint8x8_t) -> uint8x8_t { + let mut a: uint8x16x2_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = transmute(vqtbl2(transmute(a.0), transmute(a.1), b)); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2q_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19219,8 +18868,39 @@ pub fn vqtbl2q_u8(a: uint8x16x2_t, b: uint8x16_t) -> uint8x16_t { unsafe { transmute(vqtbl2q(transmute(a.0), transmute(a.1), b)) } } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2q_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl2q_u8(a: uint8x16x2_t, b: uint8x16_t) -> uint8x16_t { + let mut a: uint8x16x2_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = transmute(vqtbl2q(transmute(a.0), transmute(a.1), b)); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19228,8 +18908,34 @@ pub fn vqtbl2_p8(a: poly8x16x2_t, b: uint8x8_t) -> poly8x8_t { unsafe { transmute(vqtbl2(transmute(a.0), transmute(a.1), b)) } } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl2_p8(a: poly8x16x2_t, b: uint8x8_t) -> poly8x8_t { + let mut a: poly8x16x2_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = transmute(vqtbl2(transmute(a.0), transmute(a.1), b)); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2q_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19237,6 +18943,36 @@ pub fn vqtbl2q_p8(a: poly8x16x2_t, b: uint8x16_t) -> poly8x16_t { unsafe { transmute(vqtbl2q(transmute(a.0), transmute(a.1), b)) } } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2q_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl2q_p8(a: poly8x16x2_t, b: uint8x16_t) -> poly8x16_t { + let mut a: poly8x16x2_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = transmute(vqtbl2q(transmute(a.0), transmute(a.1), b)); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3)"] #[inline] #[target_feature(enable = "neon")] @@ -19271,6 +19007,7 @@ fn vqtbl3q(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: uint8x16_t) -> int8x16_t #[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19278,35 +19015,171 @@ pub fn vqtbl3_s8(a: int8x16x3_t, b: uint8x8_t) -> int8x8_t { vqtbl3(a.0, a.1, a.2, b) } #[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_s8)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbl))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqtbl3q_s8(a: int8x16x3_t, b: uint8x16_t) -> int8x16_t { - vqtbl3q(a.0, a.1, a.2, b) -} -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqtbl3_u8(a: uint8x16x3_t, b: uint8x8_t) -> uint8x8_t { - unsafe { transmute(vqtbl3(transmute(a.0), transmute(a.1), transmute(a.2), b)) } +pub fn vqtbl3_s8(a: int8x16x3_t, b: uint8x8_t) -> int8x8_t { + let mut a: int8x16x3_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.2 = simd_shuffle!( + a.2, + a.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = vqtbl3(a.0, a.1, a.2, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqtbl3q_u8(a: uint8x16x3_t, b: uint8x16_t) -> uint8x16_t { - unsafe { transmute(vqtbl3q(transmute(a.0), transmute(a.1), transmute(a.2), b)) } +pub fn vqtbl3q_s8(a: int8x16x3_t, b: uint8x16_t) -> int8x16_t { + vqtbl3q(a.0, a.1, a.2, b) } #[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_s8)"] #[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl3q_s8(a: int8x16x3_t, b: uint8x16_t) -> int8x16_t { + let mut a: int8x16x3_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.2 = simd_shuffle!( + a.2, + a.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = vqtbl3q(a.0, a.1, a.2, b); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl3_u8(a: uint8x16x3_t, b: uint8x8_t) -> uint8x8_t { + unsafe { transmute(vqtbl3(transmute(a.0), transmute(a.1), transmute(a.2), b)) } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl3_u8(a: uint8x16x3_t, b: uint8x8_t) -> uint8x8_t { + let mut a: uint8x16x3_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.2 = simd_shuffle!( + a.2, + a.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = + transmute(vqtbl3(transmute(a.0), transmute(a.1), transmute(a.2), b)); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl3q_u8(a: uint8x16x3_t, b: uint8x16_t) -> uint8x16_t { + unsafe { transmute(vqtbl3q(transmute(a.0), transmute(a.1), transmute(a.2), b)) } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl3q_u8(a: uint8x16x3_t, b: uint8x16_t) -> uint8x16_t { + let mut a: uint8x16x3_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.2 = simd_shuffle!( + a.2, + a.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = + transmute(vqtbl3q(transmute(a.0), transmute(a.1), transmute(a.2), b)); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_p8)"] +#[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19314,8 +19187,40 @@ pub fn vqtbl3_p8(a: poly8x16x3_t, b: uint8x8_t) -> poly8x8_t { unsafe { transmute(vqtbl3(transmute(a.0), transmute(a.1), transmute(a.2), b)) } } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl3_p8(a: poly8x16x3_t, b: uint8x8_t) -> poly8x8_t { + let mut a: poly8x16x3_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.2 = simd_shuffle!( + a.2, + a.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = + transmute(vqtbl3(transmute(a.0), transmute(a.1), transmute(a.2), b)); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19323,6 +19228,42 @@ pub fn vqtbl3q_p8(a: poly8x16x3_t, b: uint8x16_t) -> poly8x16_t { unsafe { transmute(vqtbl3q(transmute(a.0), transmute(a.1), transmute(a.2), b)) } } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl3q_p8(a: poly8x16x3_t, b: uint8x16_t) -> poly8x16_t { + let mut a: poly8x16x3_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.2 = simd_shuffle!( + a.2, + a.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = + transmute(vqtbl3q(transmute(a.0), transmute(a.1), transmute(a.2), b)); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4)"] #[inline] #[target_feature(enable = "neon")] @@ -19369,6 +19310,7 @@ fn vqtbl4q(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, e: uint8x16_t #[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19376,8 +19318,44 @@ pub fn vqtbl4_s8(a: int8x16x4_t, b: uint8x8_t) -> int8x8_t { vqtbl4(a.0, a.1, a.2, a.3, b) } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl4_s8(a: int8x16x4_t, b: uint8x8_t) -> int8x8_t { + let mut a: int8x16x4_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.2 = simd_shuffle!( + a.2, + a.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.3 = simd_shuffle!( + a.3, + a.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = vqtbl4(a.0, a.1, a.2, a.3, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19385,8 +19363,49 @@ pub fn vqtbl4q_s8(a: int8x16x4_t, b: uint8x16_t) -> int8x16_t { vqtbl4q(a.0, a.1, a.2, a.3, b) } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl4q_s8(a: int8x16x4_t, b: uint8x16_t) -> int8x16_t { + let mut a: int8x16x4_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.2 = simd_shuffle!( + a.2, + a.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.3 = simd_shuffle!( + a.3, + a.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = vqtbl4q(a.0, a.1, a.2, a.3, b); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19402,8 +19421,50 @@ pub fn vqtbl4_u8(a: uint8x16x4_t, b: uint8x8_t) -> uint8x8_t { } } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl4_u8(a: uint8x16x4_t, b: uint8x8_t) -> uint8x8_t { + let mut a: uint8x16x4_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.2 = simd_shuffle!( + a.2, + a.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.3 = simd_shuffle!( + a.3, + a.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = transmute(vqtbl4( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(a.3), + b, + )); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19419,8 +19480,55 @@ pub fn vqtbl4q_u8(a: uint8x16x4_t, b: uint8x16_t) -> uint8x16_t { } } #[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl4q_u8(a: uint8x16x4_t, b: uint8x16_t) -> uint8x16_t { + let mut a: uint8x16x4_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.2 = simd_shuffle!( + a.2, + a.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.3 = simd_shuffle!( + a.3, + a.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = transmute(vqtbl4q( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(a.3), + b, + )); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19436,30 +19544,118 @@ pub fn vqtbl4_p8(a: poly8x16x4_t, b: uint8x8_t) -> poly8x8_t { } } #[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqtbl4q_p8(a: poly8x16x4_t, b: uint8x16_t) -> poly8x16_t { +pub fn vqtbl4_p8(a: poly8x16x4_t, b: uint8x8_t) -> poly8x8_t { + let mut a: poly8x16x4_t = a; unsafe { - transmute(vqtbl4q( + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.2 = simd_shuffle!( + a.2, + a.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.3 = simd_shuffle!( + a.3, + a.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = transmute(vqtbl4( transmute(a.0), transmute(a.1), transmute(a.2), transmute(a.3), b, - )) + )); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1)"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbx))] +#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -fn vqtbx1(a: int8x8_t, b: int8x16_t, c: uint8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { +pub fn vqtbl4q_p8(a: poly8x16x4_t, b: uint8x16_t) -> poly8x16_t { + unsafe { + transmute(vqtbl4q( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(a.3), + b, + )) + } +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbl4q_p8(a: poly8x16x4_t, b: uint8x16_t) -> poly8x16_t { + let mut a: poly8x16x4_t = a; + unsafe { + a.0 = simd_shuffle!( + a.0, + a.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.1 = simd_shuffle!( + a.1, + a.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.2 = simd_shuffle!( + a.2, + a.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + a.3 = simd_shuffle!( + a.3, + a.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = transmute(vqtbl4q( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(a.3), + b, + )); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +fn vqtbx1(a: int8x8_t, b: int8x16_t, c: uint8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.tbx1.v8i8" @@ -19487,6 +19683,7 @@ fn vqtbx1q(a: int8x16_t, b: int8x16_t, c: uint8x16_t) -> int8x16_t { #[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19494,8 +19691,26 @@ pub fn vqtbx1_s8(a: int8x8_t, b: int8x16_t, c: uint8x8_t) -> int8x8_t { vqtbx1(a, b, c) } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx1_s8(a: int8x8_t, b: int8x16_t, c: uint8x8_t) -> int8x8_t { + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = vqtbx1(a, b, c); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1q_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19503,8 +19718,32 @@ pub fn vqtbx1q_s8(a: int8x16_t, b: int8x16_t, c: uint8x16_t) -> int8x16_t { vqtbx1q(a, b, c) } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1q_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx1q_s8(a: int8x16_t, b: int8x16_t, c: uint8x16_t) -> int8x16_t { + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let c: uint8x16_t = + simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = vqtbx1q(a, b, c); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19512,8 +19751,26 @@ pub fn vqtbx1_u8(a: uint8x8_t, b: uint8x16_t, c: uint8x8_t) -> uint8x8_t { unsafe { transmute(vqtbx1(transmute(a), transmute(b), c)) } } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx1_u8(a: uint8x8_t, b: uint8x16_t, c: uint8x8_t) -> uint8x8_t { + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = transmute(vqtbx1(transmute(a), transmute(b), c)); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1q_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19521,8 +19778,32 @@ pub fn vqtbx1q_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t { unsafe { transmute(vqtbx1q(transmute(a), transmute(b), c)) } } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1q_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx1q_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t { + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let c: uint8x16_t = + simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = transmute(vqtbx1q(transmute(a), transmute(b), c)); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19530,8 +19811,26 @@ pub fn vqtbx1_p8(a: poly8x8_t, b: poly8x16_t, c: uint8x8_t) -> poly8x8_t { unsafe { transmute(vqtbx1(transmute(a), transmute(b), c)) } } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx1_p8(a: poly8x8_t, b: poly8x16_t, c: uint8x8_t) -> poly8x8_t { + unsafe { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = transmute(vqtbx1(transmute(a), transmute(b), c)); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1q_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19539,6 +19838,29 @@ pub fn vqtbx1q_p8(a: poly8x16_t, b: poly8x16_t, c: uint8x16_t) -> poly8x16_t { unsafe { transmute(vqtbx1q(transmute(a), transmute(b), c)) } } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1q_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx1q_p8(a: poly8x16_t, b: poly8x16_t, c: uint8x16_t) -> poly8x16_t { + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let c: uint8x16_t = + simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = transmute(vqtbx1q(transmute(a), transmute(b), c)); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2)"] #[inline] #[target_feature(enable = "neon")] @@ -19573,6 +19895,7 @@ fn vqtbx2q(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: uint8x16_t) -> int8x16_t #[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19580,8 +19903,35 @@ pub fn vqtbx2_s8(a: int8x8_t, b: int8x16x2_t, c: uint8x8_t) -> int8x8_t { vqtbx2(a, b.0, b.1, c) } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx2_s8(a: int8x8_t, b: int8x16x2_t, c: uint8x8_t) -> int8x8_t { + let mut b: int8x16x2_t = b; + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = vqtbx2(a, b.0, b.1, c); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19589,8 +19939,41 @@ pub fn vqtbx2q_s8(a: int8x16_t, b: int8x16x2_t, c: uint8x16_t) -> int8x16_t { vqtbx2q(a, b.0, b.1, c) } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx2q_s8(a: int8x16_t, b: int8x16x2_t, c: uint8x16_t) -> int8x16_t { + let mut b: int8x16x2_t = b; + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x16_t = + simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = vqtbx2q(a, b.0, b.1, c); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19598,57 +19981,178 @@ pub fn vqtbx2_u8(a: uint8x8_t, b: uint8x16x2_t, c: uint8x8_t) -> uint8x8_t { unsafe { transmute(vqtbx2(transmute(a), transmute(b.0), transmute(b.1), c)) } } #[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqtbx2q_u8(a: uint8x16_t, b: uint8x16x2_t, c: uint8x16_t) -> uint8x16_t { - unsafe { transmute(vqtbx2q(transmute(a), transmute(b.0), transmute(b.1), c)) } +pub fn vqtbx2_u8(a: uint8x8_t, b: uint8x16x2_t, c: uint8x8_t) -> uint8x8_t { + let mut b: uint8x16x2_t = b; + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = transmute(vqtbx2(transmute(a), transmute(b.0), transmute(b.1), c)); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqtbx2_p8(a: poly8x8_t, b: poly8x16x2_t, c: uint8x8_t) -> poly8x8_t { - unsafe { transmute(vqtbx2(transmute(a), transmute(b.0), transmute(b.1), c)) } +pub fn vqtbx2q_u8(a: uint8x16_t, b: uint8x16x2_t, c: uint8x16_t) -> uint8x16_t { + unsafe { transmute(vqtbx2q(transmute(a), transmute(b.0), transmute(b.1), c)) } } #[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqtbx2q_p8(a: poly8x16_t, b: poly8x16x2_t, c: uint8x16_t) -> poly8x16_t { - unsafe { transmute(vqtbx2q(transmute(a), transmute(b.0), transmute(b.1), c)) } +pub fn vqtbx2q_u8(a: uint8x16_t, b: uint8x16x2_t, c: uint8x16_t) -> uint8x16_t { + let mut b: uint8x16x2_t = b; + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x16_t = + simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = + transmute(vqtbx2q(transmute(a), transmute(b.0), transmute(b.1), c)); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -fn vqtbx3(a: int8x8_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, e: uint8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.tbx3.v8i8" - )] - fn _vqtbx3(a: int8x8_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, e: uint8x8_t) - -> int8x8_t; - } - unsafe { _vqtbx3(a, b, c, d, e) } +pub fn vqtbx2_p8(a: poly8x8_t, b: poly8x16x2_t, c: uint8x8_t) -> poly8x8_t { + unsafe { transmute(vqtbx2(transmute(a), transmute(b.0), transmute(b.1), c)) } } #[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -fn vqtbx3q(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, e: uint8x16_t) -> int8x16_t { - unsafe extern "unadjusted" { +pub fn vqtbx2_p8(a: poly8x8_t, b: poly8x16x2_t, c: uint8x8_t) -> poly8x8_t { + let mut b: poly8x16x2_t = b; + unsafe { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = transmute(vqtbx2(transmute(a), transmute(b.0), transmute(b.1), c)); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_p8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx2q_p8(a: poly8x16_t, b: poly8x16x2_t, c: uint8x16_t) -> poly8x16_t { + unsafe { transmute(vqtbx2q(transmute(a), transmute(b.0), transmute(b.1), c)) } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx2q_p8(a: poly8x16_t, b: poly8x16x2_t, c: uint8x16_t) -> poly8x16_t { + let mut b: poly8x16x2_t = b; + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x16_t = + simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = + transmute(vqtbx2q(transmute(a), transmute(b.0), transmute(b.1), c)); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +fn vqtbx3(a: int8x8_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, e: uint8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.tbx3.v8i8" + )] + fn _vqtbx3(a: int8x8_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, e: uint8x8_t) + -> int8x8_t; + } + unsafe { _vqtbx3(a, b, c, d, e) } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +fn vqtbx3q(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, e: uint8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.tbx3.v16i8" @@ -19666,6 +20170,7 @@ fn vqtbx3q(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, e: uint8x16_t #[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19673,8 +20178,40 @@ pub fn vqtbx3_s8(a: int8x8_t, b: int8x16x3_t, c: uint8x8_t) -> int8x8_t { vqtbx3(a, b.0, b.1, b.2, c) } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx3_s8(a: int8x8_t, b: int8x16x3_t, c: uint8x8_t) -> int8x8_t { + let mut b: int8x16x3_t = b; + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.2 = simd_shuffle!( + b.2, + b.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = vqtbx3(a, b.0, b.1, b.2, c); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19682,8 +20219,46 @@ pub fn vqtbx3q_s8(a: int8x16_t, b: int8x16x3_t, c: uint8x16_t) -> int8x16_t { vqtbx3q(a, b.0, b.1, b.2, c) } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx3q_s8(a: int8x16_t, b: int8x16x3_t, c: uint8x16_t) -> int8x16_t { + let mut b: int8x16x3_t = b; + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.2 = simd_shuffle!( + b.2, + b.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x16_t = + simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = vqtbx3q(a, b.0, b.1, b.2, c); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19699,8 +20274,46 @@ pub fn vqtbx3_u8(a: uint8x8_t, b: uint8x16x3_t, c: uint8x8_t) -> uint8x8_t { } } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx3_u8(a: uint8x8_t, b: uint8x16x3_t, c: uint8x8_t) -> uint8x8_t { + let mut b: uint8x16x3_t = b; + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.2 = simd_shuffle!( + b.2, + b.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = transmute(vqtbx3( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + c, + )); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19716,8 +20329,52 @@ pub fn vqtbx3q_u8(a: uint8x16_t, b: uint8x16x3_t, c: uint8x16_t) -> uint8x16_t { } } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx3q_u8(a: uint8x16_t, b: uint8x16x3_t, c: uint8x16_t) -> uint8x16_t { + let mut b: uint8x16x3_t = b; + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.2 = simd_shuffle!( + b.2, + b.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x16_t = + simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = transmute(vqtbx3q( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + c, + )); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19733,8 +20390,46 @@ pub fn vqtbx3_p8(a: poly8x8_t, b: poly8x16x3_t, c: uint8x8_t) -> poly8x8_t { } } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx3_p8(a: poly8x8_t, b: poly8x16x3_t, c: uint8x8_t) -> poly8x8_t { + let mut b: poly8x16x3_t = b; + unsafe { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.2 = simd_shuffle!( + b.2, + b.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = transmute(vqtbx3( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + c, + )); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19750,6 +20445,49 @@ pub fn vqtbx3q_p8(a: poly8x16_t, b: poly8x16x3_t, c: uint8x16_t) -> poly8x16_t { } } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx3q_p8(a: poly8x16_t, b: poly8x16x3_t, c: uint8x16_t) -> poly8x16_t { + let mut b: poly8x16x3_t = b; + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.2 = simd_shuffle!( + b.2, + b.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x16_t = + simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = transmute(vqtbx3q( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + c, + )); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4)"] #[inline] #[target_feature(enable = "neon")] @@ -19812,6 +20550,7 @@ fn vqtbx4q( #[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19819,8 +20558,45 @@ pub fn vqtbx4_s8(a: int8x8_t, b: int8x16x4_t, c: uint8x8_t) -> int8x8_t { vqtbx4(a, b.0, b.1, b.2, b.3, c) } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4_s8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx4_s8(a: int8x8_t, b: int8x16x4_t, c: uint8x8_t) -> int8x8_t { + let mut b: int8x16x4_t = b; + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.2 = simd_shuffle!( + b.2, + b.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.3 = simd_shuffle!( + b.3, + b.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = vqtbx4(a, b.0, b.1, b.2, b.3, c); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19828,26 +20604,113 @@ pub fn vqtbx4q_s8(a: int8x16_t, b: int8x16x4_t, c: uint8x16_t) -> int8x16_t { vqtbx4q(a, b.0, b.1, b.2, b.3, c) } #[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqtbx4_u8(a: uint8x8_t, b: uint8x16x4_t, c: uint8x8_t) -> uint8x8_t { +pub fn vqtbx4q_s8(a: int8x16_t, b: int8x16x4_t, c: uint8x16_t) -> int8x16_t { + let mut b: int8x16x4_t = b; unsafe { - transmute(vqtbx4( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(b.3), + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.2 = simd_shuffle!( + b.2, + b.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.3 = simd_shuffle!( + b.3, + b.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x16_t = + simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = vqtbx4q(a, b.0, b.1, b.2, b.3, c); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4_u8)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx4_u8(a: uint8x8_t, b: uint8x16x4_t, c: uint8x8_t) -> uint8x8_t { + unsafe { + transmute(vqtbx4( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(b.3), c, )) } } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx4_u8(a: uint8x8_t, b: uint8x16x4_t, c: uint8x8_t) -> uint8x8_t { + let mut b: uint8x16x4_t = b; + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.2 = simd_shuffle!( + b.2, + b.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.3 = simd_shuffle!( + b.3, + b.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = transmute(vqtbx4( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(b.3), + c, + )); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19864,8 +20727,58 @@ pub fn vqtbx4q_u8(a: uint8x16_t, b: uint8x16x4_t, c: uint8x16_t) -> uint8x16_t { } } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx4q_u8(a: uint8x16_t, b: uint8x16x4_t, c: uint8x16_t) -> uint8x16_t { + let mut b: uint8x16x4_t = b; + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.2 = simd_shuffle!( + b.2, + b.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.3 = simd_shuffle!( + b.3, + b.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x16_t = + simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = transmute(vqtbx4q( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(b.3), + c, + )); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19882,8 +20795,52 @@ pub fn vqtbx4_p8(a: poly8x8_t, b: poly8x16x4_t, c: uint8x8_t) -> poly8x8_t { } } #[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx4_p8(a: poly8x8_t, b: poly8x16x4_t, c: uint8x8_t) -> poly8x8_t { + let mut b: poly8x16x4_t = b; + unsafe { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.2 = simd_shuffle!( + b.2, + b.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.3 = simd_shuffle!( + b.3, + b.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = transmute(vqtbx4( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(b.3), + c, + )); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Extended table look-up"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] @@ -19899,6 +20856,55 @@ pub fn vqtbx4q_p8(a: poly8x16_t, b: poly8x16x4_t, c: uint8x16_t) -> poly8x16_t { )) } } +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqtbx4q_p8(a: poly8x16_t, b: poly8x16x4_t, c: uint8x16_t) -> poly8x16_t { + let mut b: poly8x16x4_t = b; + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!( + b.0, + b.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.1 = simd_shuffle!( + b.1, + b.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.2 = simd_shuffle!( + b.2, + b.2, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + b.3 = simd_shuffle!( + b.3, + b.3, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let c: uint8x16_t = + simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = transmute(vqtbx4q( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(b.3), + c, + )); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} #[doc = "Rotate and exclusive OR"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrax1q_u64)"] #[inline] @@ -19936,7 +20942,6 @@ pub fn vrbitq_s8(a: int8x16_t) -> int8x16_t { #[doc = "Reverse bit order"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(rbit))] @@ -19944,23 +20949,8 @@ pub fn vrbit_u8(a: uint8x8_t) -> uint8x8_t { unsafe { transmute(vrbit_s8(transmute(a))) } } #[doc = "Reverse bit order"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_u8)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(rbit))] -pub fn vrbit_u8(a: uint8x8_t) -> uint8x8_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(vrbit_s8(transmute(a))); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } -} -#[doc = "Reverse bit order"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(rbit))] @@ -19968,28 +20958,8 @@ pub fn vrbitq_u8(a: uint8x16_t) -> uint8x16_t { unsafe { transmute(vrbitq_s8(transmute(a))) } } #[doc = "Reverse bit order"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_u8)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(rbit))] -pub fn vrbitq_u8(a: uint8x16_t) -> uint8x16_t { - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x16_t = transmute(vrbitq_s8(transmute(a))); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } -} -#[doc = "Reverse bit order"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(rbit))] @@ -19997,48 +20967,14 @@ pub fn vrbit_p8(a: poly8x8_t) -> poly8x8_t { unsafe { transmute(vrbit_s8(transmute(a))) } } #[doc = "Reverse bit order"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_p8)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(rbit))] -pub fn vrbit_p8(a: poly8x8_t) -> poly8x8_t { - unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(vrbit_s8(transmute(a))); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } -} -#[doc = "Reverse bit order"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(rbit))] pub fn vrbitq_p8(a: poly8x16_t) -> poly8x16_t { unsafe { transmute(vrbitq_s8(transmute(a))) } } -#[doc = "Reverse bit order"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_p8)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(rbit))] -pub fn vrbitq_p8(a: poly8x16_t) -> poly8x16_t { - unsafe { - let a: poly8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x16_t = transmute(vrbitq_s8(transmute(a))); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } -} #[doc = "Reciprocal estimate."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f64)"] #[inline] @@ -20253,7 +21189,6 @@ pub fn vrecpxh_f16(a: f16) -> f16 { #[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] @@ -20262,7131 +21197,9408 @@ pub fn vreinterpret_f64_f16(a: float16x4_t) -> float64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_f16(a: float16x4_t) -> float64x1_t { - unsafe { - let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) - } +pub fn vreinterpretq_f64_f16(a: float16x8_t) -> float64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_f16(a: float16x8_t) -> float64x2_t { +pub fn vreinterpret_f16_f64(a: float64x1_t) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] #[cfg(not(target_arch = "arm64ec"))] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_f16(a: float16x8_t) -> float64x2_t { - unsafe { - let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_f16_f64(a: float64x2_t) -> float16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f16_f64(a: float64x1_t) -> float16x4_t { +pub fn vreinterpret_s64_f64(a: float64x1_t) -> int64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f16_f64(a: float64x1_t) -> float16x4_t { - unsafe { - let ret_val: float16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_u64_f64(a: float64x1_t) -> uint64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f16_f64(a: float64x2_t) -> float16x8_t { +pub fn vreinterpret_p64_f64(a: float64x1_t) -> poly64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f16_f64(a: float64x2_t) -> float16x8_t { - unsafe { - let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_s64_f64(a: float64x2_t) -> int64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p128)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_p128(a: p128) -> float64x2_t { +pub fn vreinterpretq_u64_f64(a: float64x2_t) -> uint64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p128)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_p128(a: p128) -> float64x2_t { - unsafe { - let ret_val: float64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_f32(a: float32x2_t) -> float64x1_t { +pub fn vreinterpret_f64_s64(a: int64x1_t) -> float64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_f32(a: float32x2_t) -> float64x1_t { - unsafe { - let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) - } +pub fn vreinterpret_p64_s64(a: int64x1_t) -> poly64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_p64_f32(a: float32x2_t) -> poly64x1_t { +pub fn vreinterpretq_f64_s64(a: int64x2_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_p64_f32(a: float32x2_t) -> poly64x1_t { - unsafe { - let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) - } +pub fn vreinterpretq_p64_s64(a: int64x2_t) -> poly64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_f32(a: float32x4_t) -> float64x2_t { +pub fn vreinterpret_f64_u64(a: uint64x1_t) -> float64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_f32(a: float32x4_t) -> float64x2_t { - unsafe { - let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_p64_u64(a: uint64x1_t) -> poly64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p64_f32(a: float32x4_t) -> poly64x2_t { +pub fn vreinterpretq_f64_u64(a: uint64x2_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p64_f32(a: float32x4_t) -> poly64x2_t { - unsafe { - let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f32_f64(a: float64x1_t) -> float32x2_t { +pub fn vreinterpret_f64_p64(a: poly64x1_t) -> float64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f32_f64(a: float64x1_t) -> float32x2_t { - unsafe { - let ret_val: float32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_s64_p64(a: poly64x1_t) -> int64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_s8_f64(a: float64x1_t) -> int8x8_t { +pub fn vreinterpret_u64_p64(a: poly64x1_t) -> uint64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_s8_f64(a: float64x1_t) -> int8x8_t { - unsafe { - let ret_val: int8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_f64_p64(a: poly64x2_t) -> float64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_s16_f64(a: float64x1_t) -> int16x4_t { +pub fn vreinterpretq_s64_p64(a: poly64x2_t) -> int64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_s16_f64(a: float64x1_t) -> int16x4_t { - unsafe { - let ret_val: int16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_u64_p64(a: poly64x2_t) -> uint64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p128)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_s32_f64(a: float64x1_t) -> int32x2_t { +pub fn vreinterpretq_f64_p128(a: p128) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_s32_f64(a: float64x1_t) -> int32x2_t { - unsafe { - let ret_val: int32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_f64_f32(a: float32x2_t) -> float64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f32)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_s64_f64(a: float64x1_t) -> int64x1_t { +pub fn vreinterpret_p64_f32(a: float32x2_t) -> poly64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_u8_f64(a: float64x1_t) -> uint8x8_t { +pub fn vreinterpretq_f64_f32(a: float32x4_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_u8_f64(a: float64x1_t) -> uint8x8_t { - unsafe { - let ret_val: uint8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_p64_f32(a: float32x4_t) -> poly64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_u16_f64(a: float64x1_t) -> uint16x4_t { +pub fn vreinterpret_f32_f64(a: float64x1_t) -> float32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_u16_f64(a: float64x1_t) -> uint16x4_t { - unsafe { - let ret_val: uint16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_s8_f64(a: float64x1_t) -> int8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_u32_f64(a: float64x1_t) -> uint32x2_t { +pub fn vreinterpret_s16_f64(a: float64x1_t) -> int16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_u32_f64(a: float64x1_t) -> uint32x2_t { - unsafe { - let ret_val: uint32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_s32_f64(a: float64x1_t) -> int32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f64)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_u64_f64(a: float64x1_t) -> uint64x1_t { +pub fn vreinterpret_u8_f64(a: float64x1_t) -> uint8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_p8_f64(a: float64x1_t) -> poly8x8_t { +pub fn vreinterpret_u16_f64(a: float64x1_t) -> uint16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_p8_f64(a: float64x1_t) -> poly8x8_t { - unsafe { - let ret_val: poly8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_u32_f64(a: float64x1_t) -> uint32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_p16_f64(a: float64x1_t) -> poly16x4_t { +pub fn vreinterpret_p8_f64(a: float64x1_t) -> poly8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] pub fn vreinterpret_p16_f64(a: float64x1_t) -> poly16x4_t { - unsafe { - let ret_val: poly16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f64)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_p64_f64(a: float64x1_t) -> poly64x1_t { +pub fn vreinterpretq_p128_f64(a: float64x2_t) -> p128 { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p128_f64(a: float64x2_t) -> p128 { +pub fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p128_f64(a: float64x2_t) -> p128 { - unsafe { - let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) - } +pub fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t { +pub fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t { - unsafe { - let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t { +pub fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t { - unsafe { - let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t { +pub fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t { - unsafe { - let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t { +pub fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t { - unsafe { - let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_s64_f64(a: float64x2_t) -> int64x2_t { +pub fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_s64_f64(a: float64x2_t) -> int64x2_t { - unsafe { - let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t { +pub fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t { - unsafe { - let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t { +pub fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t { - unsafe { - let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t { +pub fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t { - unsafe { - let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_u64_f64(a: float64x2_t) -> uint64x2_t { +pub fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_u64_f64(a: float64x2_t) -> uint64x2_t { - unsafe { - let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t { +pub fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t { - unsafe { - let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t { +pub fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t { - unsafe { - let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t { +pub fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t { - unsafe { - let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_f32_p64(a: poly64x1_t) -> float32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t { +pub fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t { unsafe { transmute(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s8)"] +#[doc = "Floating-point round to 32-bit integer, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32x_f32)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t { - unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))] +pub fn vrnd32x_f32(a: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frint32x.v2f32" + )] + fn _vrnd32x_f32(a: float32x2_t) -> float32x2_t; } + unsafe { _vrnd32x_f32(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s8)"] -#[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t { - unsafe { transmute(a) } -} -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s8)"] +#[doc = "Floating-point round to 32-bit integer, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32xq_f32)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t { - unsafe { - let a: int8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))] +pub fn vrnd32xq_f32(a: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frint32x.v4f32" + )] + fn _vrnd32xq_f32(a: float32x4_t) -> float32x4_t; } + unsafe { _vrnd32xq_f32(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s16)"] +#[doc = "Floating-point round to 32-bit integer, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32xq_f64)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t { - unsafe { transmute(a) } +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))] +pub fn vrnd32xq_f64(a: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frint32x.v2f64" + )] + fn _vrnd32xq_f64(a: float64x2_t) -> float64x2_t; + } + unsafe { _vrnd32xq_f64(a) } +} +#[doc = "Floating-point round to 32-bit integer, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32x_f64)"] +#[inline] +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))] +pub fn vrnd32x_f64(a: float64x1_t) -> float64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.frint32x.f64" + )] + fn _vrnd32x_f64(a: f64) -> f64; + } + unsafe { transmute(_vrnd32x_f64(vget_lane_f64::<0>(a))) } +} +#[doc = "Floating-point round to 32-bit integer toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32z_f32)"] +#[inline] +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))] +pub fn vrnd32z_f32(a: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frint32z.v2f32" + )] + fn _vrnd32z_f32(a: float32x2_t) -> float32x2_t; + } + unsafe { _vrnd32z_f32(a) } +} +#[doc = "Floating-point round to 32-bit integer toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32zq_f32)"] +#[inline] +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))] +pub fn vrnd32zq_f32(a: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frint32z.v4f32" + )] + fn _vrnd32zq_f32(a: float32x4_t) -> float32x4_t; + } + unsafe { _vrnd32zq_f32(a) } +} +#[doc = "Floating-point round to 32-bit integer toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32zq_f64)"] +#[inline] +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))] +pub fn vrnd32zq_f64(a: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frint32z.v2f64" + )] + fn _vrnd32zq_f64(a: float64x2_t) -> float64x2_t; + } + unsafe { _vrnd32zq_f64(a) } +} +#[doc = "Floating-point round to 32-bit integer toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32z_f64)"] +#[inline] +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))] +pub fn vrnd32z_f64(a: float64x1_t) -> float64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.frint32z.f64" + )] + fn _vrnd32z_f64(a: f64) -> f64; + } + unsafe { transmute(_vrnd32z_f64(vget_lane_f64::<0>(a))) } +} +#[doc = "Floating-point round to 64-bit integer, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64x_f32)"] +#[inline] +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))] +pub fn vrnd64x_f32(a: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frint64x.v2f32" + )] + fn _vrnd64x_f32(a: float32x2_t) -> float32x2_t; + } + unsafe { _vrnd64x_f32(a) } +} +#[doc = "Floating-point round to 64-bit integer, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64xq_f32)"] +#[inline] +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))] +pub fn vrnd64xq_f32(a: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frint64x.v4f32" + )] + fn _vrnd64xq_f32(a: float32x4_t) -> float32x4_t; + } + unsafe { _vrnd64xq_f32(a) } +} +#[doc = "Floating-point round to 64-bit integer, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64xq_f64)"] +#[inline] +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))] +pub fn vrnd64xq_f64(a: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frint64x.v2f64" + )] + fn _vrnd64xq_f64(a: float64x2_t) -> float64x2_t; + } + unsafe { _vrnd64xq_f64(a) } +} +#[doc = "Floating-point round to 64-bit integer, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64x_f64)"] +#[inline] +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))] +pub fn vrnd64x_f64(a: float64x1_t) -> float64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.frint64x.f64" + )] + fn _vrnd64x_f64(a: f64) -> f64; + } + unsafe { transmute(_vrnd64x_f64(vget_lane_f64::<0>(a))) } +} +#[doc = "Floating-point round to 64-bit integer toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64z_f32)"] +#[inline] +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))] +pub fn vrnd64z_f32(a: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frint64z.v2f32" + )] + fn _vrnd64z_f32(a: float32x2_t) -> float32x2_t; + } + unsafe { _vrnd64z_f32(a) } +} +#[doc = "Floating-point round to 64-bit integer toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64zq_f32)"] +#[inline] +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))] +pub fn vrnd64zq_f32(a: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frint64z.v4f32" + )] + fn _vrnd64zq_f32(a: float32x4_t) -> float32x4_t; + } + unsafe { _vrnd64zq_f32(a) } +} +#[doc = "Floating-point round to 64-bit integer toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64zq_f64)"] +#[inline] +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))] +pub fn vrnd64zq_f64(a: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frint64z.v2f64" + )] + fn _vrnd64zq_f64(a: float64x2_t) -> float64x2_t; + } + unsafe { _vrnd64zq_f64(a) } +} +#[doc = "Floating-point round to 64-bit integer toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64z_f64)"] +#[inline] +#[target_feature(enable = "neon,frintts")] +#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))] +pub fn vrnd64z_f64(a: float64x1_t) -> float64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.frint64z.f64" + )] + fn _vrnd64z_f64(a: f64) -> f64; + } + unsafe { transmute(_vrnd64z_f64(vget_lane_f64::<0>(a))) } +} +#[doc = "Floating-point round to integral, toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frintz))] +pub fn vrnd_f16(a: float16x4_t) -> float16x4_t { + unsafe { simd_trunc(a) } +} +#[doc = "Floating-point round to integral, toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndq_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frintz))] +pub fn vrndq_f16(a: float16x8_t) -> float16x8_t { + unsafe { simd_trunc(a) } +} +#[doc = "Floating-point round to integral, toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintz))] +pub fn vrnd_f32(a: float32x2_t) -> float32x2_t { + unsafe { simd_trunc(a) } +} +#[doc = "Floating-point round to integral, toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintz))] +pub fn vrndq_f32(a: float32x4_t) -> float32x4_t { + unsafe { simd_trunc(a) } +} +#[doc = "Floating-point round to integral, toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintz))] +pub fn vrnd_f64(a: float64x1_t) -> float64x1_t { + unsafe { simd_trunc(a) } +} +#[doc = "Floating-point round to integral, toward zero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndq_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintz))] +pub fn vrndq_f64(a: float64x2_t) -> float64x2_t { + unsafe { simd_trunc(a) } +} +#[doc = "Floating-point round to integral, to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frinta))] +pub fn vrnda_f16(a: float16x4_t) -> float16x4_t { + unsafe { simd_round(a) } +} +#[doc = "Floating-point round to integral, to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndaq_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frinta))] +pub fn vrndaq_f16(a: float16x8_t) -> float16x8_t { + unsafe { simd_round(a) } +} +#[doc = "Floating-point round to integral, to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frinta))] +pub fn vrnda_f32(a: float32x2_t) -> float32x2_t { + unsafe { simd_round(a) } +} +#[doc = "Floating-point round to integral, to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndaq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frinta))] +pub fn vrndaq_f32(a: float32x4_t) -> float32x4_t { + unsafe { simd_round(a) } +} +#[doc = "Floating-point round to integral, to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frinta))] +pub fn vrnda_f64(a: float64x1_t) -> float64x1_t { + unsafe { simd_round(a) } +} +#[doc = "Floating-point round to integral, to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndaq_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frinta))] +pub fn vrndaq_f64(a: float64x2_t) -> float64x2_t { + unsafe { simd_round(a) } +} +#[doc = "Floating-point round to integral, to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndah_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frinta))] +pub fn vrndah_f16(a: f16) -> f16 { + roundf16(a) +} +#[doc = "Floating-point round to integral, to nearest with ties to away"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndh_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frintz))] +pub fn vrndh_f16(a: f16) -> f16 { + truncf16(a) +} +#[doc = "Floating-point round to integral, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frinti))] +pub fn vrndi_f16(a: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.nearbyint.v4f16" + )] + fn _vrndi_f16(a: float16x4_t) -> float16x4_t; + } + unsafe { _vrndi_f16(a) } +} +#[doc = "Floating-point round to integral, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frinti))] +pub fn vrndiq_f16(a: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.nearbyint.v8f16" + )] + fn _vrndiq_f16(a: float16x8_t) -> float16x8_t; + } + unsafe { _vrndiq_f16(a) } +} +#[doc = "Floating-point round to integral, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frinti))] +pub fn vrndi_f32(a: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.nearbyint.v2f32" + )] + fn _vrndi_f32(a: float32x2_t) -> float32x2_t; + } + unsafe { _vrndi_f32(a) } +} +#[doc = "Floating-point round to integral, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frinti))] +pub fn vrndiq_f32(a: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.nearbyint.v4f32" + )] + fn _vrndiq_f32(a: float32x4_t) -> float32x4_t; + } + unsafe { _vrndiq_f32(a) } +} +#[doc = "Floating-point round to integral, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frinti))] +pub fn vrndi_f64(a: float64x1_t) -> float64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.nearbyint.v1f64" + )] + fn _vrndi_f64(a: float64x1_t) -> float64x1_t; + } + unsafe { _vrndi_f64(a) } +} +#[doc = "Floating-point round to integral, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frinti))] +pub fn vrndiq_f64(a: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.nearbyint.v2f64" + )] + fn _vrndiq_f64(a: float64x2_t) -> float64x2_t; + } + unsafe { _vrndiq_f64(a) } +} +#[doc = "Floating-point round to integral, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndih_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frinti))] +pub fn vrndih_f16(a: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.nearbyint.f16" + )] + fn _vrndih_f16(a: f16) -> f16; + } + unsafe { _vrndih_f16(a) } +} +#[doc = "Floating-point round to integral, toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frintm))] +pub fn vrndm_f16(a: float16x4_t) -> float16x4_t { + unsafe { simd_floor(a) } +} +#[doc = "Floating-point round to integral, toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmq_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frintm))] +pub fn vrndmq_f16(a: float16x8_t) -> float16x8_t { + unsafe { simd_floor(a) } +} +#[doc = "Floating-point round to integral, toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintm))] +pub fn vrndm_f32(a: float32x2_t) -> float32x2_t { + unsafe { simd_floor(a) } +} +#[doc = "Floating-point round to integral, toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintm))] +pub fn vrndmq_f32(a: float32x4_t) -> float32x4_t { + unsafe { simd_floor(a) } +} +#[doc = "Floating-point round to integral, toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintm))] +pub fn vrndm_f64(a: float64x1_t) -> float64x1_t { + unsafe { simd_floor(a) } +} +#[doc = "Floating-point round to integral, toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmq_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintm))] +pub fn vrndmq_f64(a: float64x2_t) -> float64x2_t { + unsafe { simd_floor(a) } +} +#[doc = "Floating-point round to integral, toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmh_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frintm))] +pub fn vrndmh_f16(a: f16) -> f16 { + floorf16(a) +} +#[doc = "Floating-point round to integral, to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintn))] +pub fn vrndn_f64(a: float64x1_t) -> float64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.roundeven.v1f64" + )] + fn _vrndn_f64(a: float64x1_t) -> float64x1_t; + } + unsafe { _vrndn_f64(a) } +} +#[doc = "Floating-point round to integral, to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnq_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintn))] +pub fn vrndnq_f64(a: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.roundeven.v2f64" + )] + fn _vrndnq_f64(a: float64x2_t) -> float64x2_t; + } + unsafe { _vrndnq_f64(a) } +} +#[doc = "Floating-point round to integral, toward minus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnh_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frintn))] +pub fn vrndnh_f16(a: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.roundeven.f16" + )] + fn _vrndnh_f16(a: f16) -> f16; + } + unsafe { _vrndnh_f16(a) } +} +#[doc = "Floating-point round to integral, to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndns_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintn))] +pub fn vrndns_f32(a: f32) -> f32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.roundeven.f32" + )] + fn _vrndns_f32(a: f32) -> f32; + } + unsafe { _vrndns_f32(a) } +} +#[doc = "Floating-point round to integral, toward plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frintp))] +pub fn vrndp_f16(a: float16x4_t) -> float16x4_t { + unsafe { simd_ceil(a) } +} +#[doc = "Floating-point round to integral, toward plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndpq_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frintp))] +pub fn vrndpq_f16(a: float16x8_t) -> float16x8_t { + unsafe { simd_ceil(a) } +} +#[doc = "Floating-point round to integral, toward plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintp))] +pub fn vrndp_f32(a: float32x2_t) -> float32x2_t { + unsafe { simd_ceil(a) } +} +#[doc = "Floating-point round to integral, toward plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndpq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintp))] +pub fn vrndpq_f32(a: float32x4_t) -> float32x4_t { + unsafe { simd_ceil(a) } +} +#[doc = "Floating-point round to integral, toward plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintp))] +pub fn vrndp_f64(a: float64x1_t) -> float64x1_t { + unsafe { simd_ceil(a) } +} +#[doc = "Floating-point round to integral, toward plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndpq_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintp))] +pub fn vrndpq_f64(a: float64x2_t) -> float64x2_t { + unsafe { simd_ceil(a) } +} +#[doc = "Floating-point round to integral, toward plus infinity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndph_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frintp))] +pub fn vrndph_f16(a: f16) -> f16 { + ceilf16(a) +} +#[doc = "Floating-point round to integral exact, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frintx))] +pub fn vrndx_f16(a: float16x4_t) -> float16x4_t { + unsafe { simd_round_ties_even(a) } +} +#[doc = "Floating-point round to integral exact, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frintx))] +pub fn vrndxq_f16(a: float16x8_t) -> float16x8_t { + unsafe { simd_round_ties_even(a) } +} +#[doc = "Floating-point round to integral exact, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintx))] +pub fn vrndx_f32(a: float32x2_t) -> float32x2_t { + unsafe { simd_round_ties_even(a) } +} +#[doc = "Floating-point round to integral exact, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintx))] +pub fn vrndxq_f32(a: float32x4_t) -> float32x4_t { + unsafe { simd_round_ties_even(a) } +} +#[doc = "Floating-point round to integral exact, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintx))] +pub fn vrndx_f64(a: float64x1_t) -> float64x1_t { + unsafe { simd_round_ties_even(a) } +} +#[doc = "Floating-point round to integral exact, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(frintx))] +pub fn vrndxq_f64(a: float64x2_t) -> float64x2_t { + unsafe { simd_round_ties_even(a) } +} +#[doc = "Floating-point round to integral, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxh_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(frintx))] +pub fn vrndxh_f16(a: f16) -> f16 { + round_ties_even_f16(a) +} +#[doc = "Signed rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshld_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(srshl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrshld_s64(a: i64, b: i64) -> i64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srshl.i64" + )] + fn _vrshld_s64(a: i64, b: i64) -> i64; + } + unsafe { _vrshld_s64(a, b) } +} +#[doc = "Unsigned rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshld_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(urshl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrshld_u64(a: u64, b: i64) -> u64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urshl.i64" + )] + fn _vrshld_u64(a: u64, b: i64) -> u64; + } + unsafe { _vrshld_u64(a, b) } +} +#[doc = "Signed rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrd_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(srshr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrshrd_n_s64(a: i64) -> i64 { + static_assert!(N >= 1 && N <= 64); + vrshld_s64(a, -N as i64) +} +#[doc = "Unsigned rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrd_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(urshr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrshrd_n_u64(a: u64) -> u64 { + static_assert!(N >= 1 && N <= 64); + vrshld_u64(a, -N as i64) +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { + static_assert!(N >= 1 && N <= 8); + vcombine_s8(a, vrshrn_n_s16::(b)) +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t { + static_assert!(N >= 1 && N <= 16); + vcombine_s16(a, vrshrn_n_s32::(b)) +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t { + static_assert!(N >= 1 && N <= 32); + vcombine_s32(a, vrshrn_n_s64::(b)) +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t { + static_assert!(N >= 1 && N <= 8); + vcombine_u8(a, vrshrn_n_u16::(b)) +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t { + static_assert!(N >= 1 && N <= 16); + vcombine_u16(a, vrshrn_n_u32::(b)) +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t { + static_assert!(N >= 1 && N <= 32); + vcombine_u32(a, vrshrn_n_u64::(b)) +} +#[doc = "Reciprocal square-root estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frsqrte))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsqrte_f64(a: float64x1_t) -> float64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrte.v1f64" + )] + fn _vrsqrte_f64(a: float64x1_t) -> float64x1_t; + } + unsafe { _vrsqrte_f64(a) } +} +#[doc = "Reciprocal square-root estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frsqrte))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsqrteq_f64(a: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrte.v2f64" + )] + fn _vrsqrteq_f64(a: float64x2_t) -> float64x2_t; + } + unsafe { _vrsqrteq_f64(a) } +} +#[doc = "Reciprocal square-root estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrted_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frsqrte))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsqrted_f64(a: f64) -> f64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrte.f64" + )] + fn _vrsqrted_f64(a: f64) -> f64; + } + unsafe { _vrsqrted_f64(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s16)"] +#[doc = "Reciprocal square-root estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtes_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frsqrte))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) +pub fn vrsqrtes_f32(a: f32) -> f32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrte.f32" + )] + fn _vrsqrtes_f32(a: f32) -> f32; } + unsafe { _vrsqrtes_f32(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s16)"] +#[doc = "Reciprocal square-root estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteh_f16)"] +#[inline] +#[cfg_attr(test, assert_instr(frsqrte))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vrsqrteh_f16(a: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrte.f16" + )] + fn _vrsqrteh_f16(a: f16) -> f16; + } + unsafe { _vrsqrteh_f16(a) } +} +#[doc = "Floating-point reciprocal square root step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frsqrts))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsqrts_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrts.v1f64" + )] + fn _vrsqrts_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t; + } + unsafe { _vrsqrts_f64(a, b) } +} +#[doc = "Floating-point reciprocal square root step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsq_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frsqrts))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsqrtsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrts.v2f64" + )] + fn _vrsqrtsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; + } + unsafe { _vrsqrtsq_f64(a, b) } +} +#[doc = "Floating-point reciprocal square root step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsd_f64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frsqrts))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsqrtsd_f64(a: f64, b: f64) -> f64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrts.f64" + )] + fn _vrsqrtsd_f64(a: f64, b: f64) -> f64; + } + unsafe { _vrsqrtsd_f64(a, b) } +} +#[doc = "Floating-point reciprocal square root step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtss_f32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frsqrts))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsqrtss_f32(a: f32, b: f32) -> f32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrts.f32" + )] + fn _vrsqrtss_f32(a: f32, b: f32) -> f32; + } + unsafe { _vrsqrtss_f32(a, b) } +} +#[doc = "Floating-point reciprocal square root step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsh_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(test, assert_instr(frsqrts))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vrsqrtsh_f16(a: f16, b: f16) -> f16 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrts.f16" + )] + fn _vrsqrtsh_f16(a: f16, b: f16) -> f16; + } + unsafe { _vrsqrtsh_f16(a, b) } +} +#[doc = "Signed rounding shift right and accumulate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsrad_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(srshr, N = 2))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsrad_n_s64(a: i64, b: i64) -> i64 { + static_assert!(N >= 1 && N <= 64); + let b: i64 = vrshrd_n_s64::(b); + a.wrapping_add(b) +} +#[doc = "Unsigned rounding shift right and accumulate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsrad_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(urshr, N = 2))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsrad_n_u64(a: u64, b: u64) -> u64 { + static_assert!(N >= 1 && N <= 64); + let b: u64 = vrshrd_n_u64::(b); + a.wrapping_add(b) +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_endian = "little")] +#[cfg_attr(test, assert_instr(rsubhn2))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t { + vcombine_s8(a, vrsubhn_s16(b, c)) +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s32)"] #[inline] +#[target_feature(enable = "neon")] #[cfg(target_endian = "little")] +#[cfg_attr(test, assert_instr(rsubhn2))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t { + vcombine_s16(a, vrsubhn_s32(b, c)) +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s64)"] +#[inline] #[target_feature(enable = "neon")] +#[cfg(target_endian = "little")] +#[cfg_attr(test, assert_instr(rsubhn2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t { - unsafe { transmute(a) } +pub fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t { + vcombine_s32(a, vrsubhn_s64(b, c)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s16)"] +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_endian = "little")] +#[cfg_attr(test, assert_instr(rsubhn2))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t { + vcombine_u8(a, vrsubhn_u16(b, c)) +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_endian = "little")] +#[cfg_attr(test, assert_instr(rsubhn2))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t { + vcombine_u16(a, vrsubhn_u32(b, c)) +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_endian = "little")] +#[cfg_attr(test, assert_instr(rsubhn2))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t { + vcombine_u32(a, vrsubhn_u64(b, c)) +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s16)"] #[inline] +#[target_feature(enable = "neon")] #[cfg(target_endian = "big")] +#[cfg_attr(test, assert_instr(rsubhn))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t { + vcombine_s8(a, vrsubhn_s16(b, c)) +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s32)"] +#[inline] #[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] +#[cfg_attr(test, assert_instr(rsubhn))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t { - unsafe { - let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t { + vcombine_s16(a, vrsubhn_s32(b, c)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s32)"] +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] +#[cfg_attr(test, assert_instr(rsubhn))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t { + vcombine_s32(a, vrsubhn_s64(b, c)) +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] +#[cfg_attr(test, assert_instr(rsubhn))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t { + vcombine_u8(a, vrsubhn_u16(b, c)) +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] +#[cfg_attr(test, assert_instr(rsubhn))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t { + vcombine_u16(a, vrsubhn_u32(b, c)) +} +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] +#[cfg_attr(test, assert_instr(rsubhn))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t { - unsafe { transmute(a) } +pub fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t { + vcombine_u32(a, vrsubhn_u64(b, c)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s32)"] +#[doc = "Multi-vector floating-point adjust exponent"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscale_f16)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t { - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) +#[unstable(feature = "stdarch_neon_fp8", issue = "none")] +#[target_feature(enable = "neon,fp8")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))] +pub fn vscale_f16(vn: float16x4_t, vm: int16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fp8.fscale.v4f16" + )] + fn _vscale_f16(vn: float16x4_t, vm: int16x4_t) -> float16x4_t; } + unsafe { _vscale_f16(vn, vm) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s32)"] +#[doc = "Multi-vector floating-point adjust exponent"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscaleq_f16)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t { - unsafe { transmute(a) } +#[unstable(feature = "stdarch_neon_fp8", issue = "none")] +#[target_feature(enable = "neon,fp8")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))] +pub fn vscaleq_f16(vn: float16x8_t, vm: int16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fp8.fscale.v8f16" + )] + fn _vscaleq_f16(vn: float16x8_t, vm: int16x8_t) -> float16x8_t; + } + unsafe { _vscaleq_f16(vn, vm) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s32)"] +#[doc = "Multi-vector floating-point adjust exponent"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscale_f32)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t { - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) +#[unstable(feature = "stdarch_neon_fp8", issue = "none")] +#[target_feature(enable = "neon,fp8")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))] +pub fn vscale_f32(vn: float32x2_t, vm: int32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fp8.fscale.v2f32" + )] + fn _vscale_f32(vn: float32x2_t, vm: int32x2_t) -> float32x2_t; } + unsafe { _vscale_f32(vn, vm) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s64)"] +#[doc = "Multi-vector floating-point adjust exponent"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscaleq_f32)"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_s64(a: int64x1_t) -> float64x1_t { - unsafe { transmute(a) } +#[unstable(feature = "stdarch_neon_fp8", issue = "none")] +#[target_feature(enable = "neon,fp8")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))] +pub fn vscaleq_f32(vn: float32x4_t, vm: int32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fp8.fscale.v4f32" + )] + fn _vscaleq_f32(vn: float32x4_t, vm: int32x4_t) -> float32x4_t; + } + unsafe { _vscaleq_f32(vn, vm) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s64)"] +#[doc = "Multi-vector floating-point adjust exponent"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscaleq_f64)"] +#[inline] +#[unstable(feature = "stdarch_neon_fp8", issue = "none")] +#[target_feature(enable = "neon,fp8")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))] +pub fn vscaleq_f64(vn: float64x2_t, vm: int64x2_t) -> float64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fp8.fscale.v2f64" + )] + fn _vscaleq_f64(vn: float64x2_t, vm: int64x2_t) -> float64x2_t; + } + unsafe { _vscaleq_f64(vn, vm) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f64)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_p64_s64(a: int64x1_t) -> poly64x1_t { - unsafe { transmute(a) } +pub fn vset_lane_f64(a: f64, b: float64x1_t) -> float64x1_t { + static_assert!(LANE == 0); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f64)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_s64(a: int64x2_t) -> float64x2_t { - unsafe { transmute(a) } +pub fn vsetq_lane_f64(a: f64, b: float64x2_t) -> float64x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f64)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_s64(a: int64x2_t) -> float64x2_t { +pub fn vsetq_lane_f64(a: f64, b: float64x2_t) -> float64x2_t { + static_assert_uimm_bits!(LANE, 1); unsafe { - let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float64x2_t = transmute(a); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float64x2_t = simd_insert!(b, LANE as u32, a); simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s64)"] +#[doc = "SHA512 hash update part 2"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512h2q_u64)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p64_s64(a: int64x2_t) -> poly64x2_t { - unsafe { transmute(a) } +#[target_feature(enable = "neon,sha3")] +#[cfg_attr(test, assert_instr(sha512h2))] +#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] +pub fn vsha512h2q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha512h2" + )] + fn _vsha512h2q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t; + } + unsafe { _vsha512h2q_u64(a, b, c) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s64)"] +#[doc = "SHA512 hash update part 2"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512h2q_u64)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p64_s64(a: int64x2_t) -> poly64x2_t { +#[target_feature(enable = "neon,sha3")] +#[cfg_attr(test, assert_instr(sha512h2))] +#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] +pub fn vsha512h2q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha512h2" + )] + fn _vsha512h2q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t; + } unsafe { - let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly64x2_t = transmute(a); + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); + let c: uint64x2_t = simd_shuffle!(c, c, [1, 0]); + let ret_val: uint64x2_t = _vsha512h2q_u64(a, b, c); simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u8)"] +#[doc = "SHA512 hash update part 1"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512hq_u64)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t { - unsafe { transmute(a) } +#[target_feature(enable = "neon,sha3")] +#[cfg_attr(test, assert_instr(sha512h))] +#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] +pub fn vsha512hq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha512h" + )] + fn _vsha512hq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t; + } + unsafe { _vsha512hq_u64(a, b, c) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u8)"] +#[doc = "SHA512 hash update part 1"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512hq_u64)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t { +#[target_feature(enable = "neon,sha3")] +#[cfg_attr(test, assert_instr(sha512h))] +#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] +pub fn vsha512hq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha512h" + )] + fn _vsha512hq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t; + } unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); + let c: uint64x2_t = simd_shuffle!(c, c, [1, 0]); + let ret_val: uint64x2_t = _vsha512hq_u64(a, b, c); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u8)"] +#[doc = "SHA512 schedule update 0"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512su0q_u64)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t { - unsafe { transmute(a) } +#[target_feature(enable = "neon,sha3")] +#[cfg_attr(test, assert_instr(sha512su0))] +#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] +pub fn vsha512su0q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha512su0" + )] + fn _vsha512su0q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; + } + unsafe { _vsha512su0q_u64(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u8)"] +#[doc = "SHA512 schedule update 0"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512su0q_u64)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t { +#[target_feature(enable = "neon,sha3")] +#[cfg_attr(test, assert_instr(sha512su0))] +#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] +pub fn vsha512su0q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha512su0" + )] + fn _vsha512su0q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; + } unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float64x2_t = transmute(a); + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint64x2_t = _vsha512su0q_u64(a, b); simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u16)"] +#[doc = "SHA512 schedule update 1"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512su1q_u64)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t { - unsafe { transmute(a) } +#[target_feature(enable = "neon,sha3")] +#[cfg_attr(test, assert_instr(sha512su1))] +#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] +pub fn vsha512su1q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha512su1" + )] + fn _vsha512su1q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t; + } + unsafe { _vsha512su1q_u64(a, b, c) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u16)"] +#[doc = "SHA512 schedule update 1"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512su1q_u64)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t { +#[target_feature(enable = "neon,sha3")] +#[cfg_attr(test, assert_instr(sha512su1))] +#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] +pub fn vsha512su1q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha512su1" + )] + fn _vsha512su1q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t; + } unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); + let c: uint64x2_t = simd_shuffle!(c, c, [1, 0]); + let ret_val: uint64x2_t = _vsha512su1q_u64(a, b, c); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u16)"] -#[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t { - unsafe { transmute(a) } -} -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u16)"] +#[doc = "Signed Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshld_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sshl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vshld_s64(a: i64, b: i64) -> i64 { + unsafe { transmute(vshl_s64(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u32)"] +#[doc = "Unsigned Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshld_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ushl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t { - unsafe { transmute(a) } +pub fn vshld_u64(a: u64, b: i64) -> u64 { + unsafe { transmute(vshl_u64(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u32)"] +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sshll2, N = 2))] +#[rustc_legacy_const_generics(1)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t { - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) - } +pub fn vshll_high_n_s8(a: int8x16_t) -> int16x8_t { + static_assert!(N >= 0 && N <= 8); + let b = vget_high_s8(a); + vshll_n_s8::(b) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u32)"] +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sshll2, N = 2))] +#[rustc_legacy_const_generics(1)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t { - unsafe { transmute(a) } +pub fn vshll_high_n_s16(a: int16x8_t) -> int32x4_t { + static_assert!(N >= 0 && N <= 16); + let b = vget_high_s16(a); + vshll_n_s16::(b) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u32)"] +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(sshll2, N = 2))] +#[rustc_legacy_const_generics(1)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vshll_high_n_s32(a: int32x4_t) -> int64x2_t { + static_assert!(N >= 0 && N <= 32); + let b = vget_high_s32(a); + vshll_n_s32::(b) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u64)"] +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(ushll2, N = 2))] +#[rustc_legacy_const_generics(1)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_u64(a: uint64x1_t) -> float64x1_t { - unsafe { transmute(a) } +pub fn vshll_high_n_u8(a: uint8x16_t) -> uint16x8_t { + static_assert!(N >= 0 && N <= 8); + let b: uint8x8_t = vget_high_u8(a); + vshll_n_u8::(b) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u64)"] +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u16)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(ushll2, N = 2))] +#[rustc_legacy_const_generics(1)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_p64_u64(a: uint64x1_t) -> poly64x1_t { - unsafe { transmute(a) } +pub fn vshll_high_n_u16(a: uint16x8_t) -> uint32x4_t { + static_assert!(N >= 0 && N <= 16); + let b: uint16x4_t = vget_high_u16(a); + vshll_n_u16::(b) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u64)"] +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(ushll2, N = 2))] +#[rustc_legacy_const_generics(1)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_u64(a: uint64x2_t) -> float64x2_t { - unsafe { transmute(a) } +pub fn vshll_high_n_u32(a: uint32x4_t) -> uint64x2_t { + static_assert!(N >= 0 && N <= 32); + let b: uint32x2_t = vget_high_u32(a); + vshll_n_u32::(b) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u64)"] +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_u64(a: uint64x2_t) -> float64x2_t { - unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { + static_assert!(N >= 1 && N <= 8); + vcombine_s8(a, vshrn_n_s16::(b)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u64)"] +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t { - unsafe { transmute(a) } +pub fn vshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t { + static_assert!(N >= 1 && N <= 16); + vcombine_s16(a, vshrn_n_s32::(b)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u64)"] +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t { - unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t { + static_assert!(N >= 1 && N <= 32); + vcombine_s32(a, vshrn_n_s64::(b)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p8)"] +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t { - unsafe { transmute(a) } +pub fn vshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t { + static_assert!(N >= 1 && N <= 8); + vcombine_u8(a, vshrn_n_u16::(b)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p8)"] +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t { - unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) - } +pub fn vshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t { + static_assert!(N >= 1 && N <= 16); + vcombine_u16(a, vshrn_n_u32::(b)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p8)"] +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t { - unsafe { transmute(a) } +pub fn vshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t { + static_assert!(N >= 1 && N <= 32); + vcombine_u32(a, vshrn_n_u64::(b)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p8)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t { - unsafe { - let a: poly8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) +pub fn vsli_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vsli.v8i8" + )] + fn _vsli_n_s8(a: int8x8_t, b: int8x8_t, n: i32) -> int8x8_t; } + unsafe { _vsli_n_s8(a, b, N) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p16)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t { - unsafe { transmute(a) } +pub fn vsliq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vsli.v16i8" + )] + fn _vsliq_n_s8(a: int8x16_t, b: int8x16_t, n: i32) -> int8x16_t; + } + unsafe { _vsliq_n_s8(a, b, N) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p16)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t { - unsafe { - let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) +pub fn vsli_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(N, 4); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vsli.v4i16" + )] + fn _vsli_n_s16(a: int16x4_t, b: int16x4_t, n: i32) -> int16x4_t; } + unsafe { _vsli_n_s16(a, b, N) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p16)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t { - unsafe { transmute(a) } +pub fn vsliq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(N, 4); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vsli.v8i16" + )] + fn _vsliq_n_s16(a: int16x8_t, b: int16x8_t, n: i32) -> int16x8_t; + } + unsafe { _vsliq_n_s16(a, b, N) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p16)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t { - unsafe { - let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) +pub fn vsli_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert!(N >= 0 && N <= 31); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vsli.v2i32" + )] + fn _vsli_n_s32(a: int32x2_t, b: int32x2_t, n: i32) -> int32x2_t; } + unsafe { _vsli_n_s32(a, b, N) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p64)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f32_p64(a: poly64x1_t) -> float32x2_t { - unsafe { transmute(a) } +pub fn vsliq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert!(N >= 0 && N <= 31); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vsli.v4i32" + )] + fn _vsliq_n_s32(a: int32x4_t, b: int32x4_t, n: i32) -> int32x4_t; + } + unsafe { _vsliq_n_s32(a, b, N) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p64)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f32_p64(a: poly64x1_t) -> float32x2_t { - unsafe { - let ret_val: float32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) +pub fn vsli_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + static_assert!(N >= 0 && N <= 63); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vsli.v1i64" + )] + fn _vsli_n_s64(a: int64x1_t, b: int64x1_t, n: i32) -> int64x1_t; } + unsafe { _vsli_n_s64(a, b, N) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p64)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s64)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_f64_p64(a: poly64x1_t) -> float64x1_t { - unsafe { transmute(a) } +pub fn vsliq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert!(N >= 0 && N <= 63); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vsli.v2i64" + )] + fn _vsliq_n_s64(a: int64x2_t, b: int64x2_t, n: i32) -> int64x2_t; + } + unsafe { _vsliq_n_s64(a, b, N) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p64)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_s64_p64(a: poly64x1_t) -> int64x1_t { - unsafe { transmute(a) } +pub fn vsli_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { transmute(vsli_n_s8::(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p64)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpret_u64_p64(a: poly64x1_t) -> uint64x1_t { - unsafe { transmute(a) } +pub fn vsliq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe { transmute(vsliq_n_s8::(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p64)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t { - unsafe { transmute(a) } +pub fn vsli_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 4); + unsafe { transmute(vsli_n_s16::(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p64)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t { - unsafe { - let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vsliq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { transmute(vsliq_n_s16::(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p64)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_p64(a: poly64x2_t) -> float64x2_t { - unsafe { transmute(a) } +pub fn vsli_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert!(N >= 0 && N <= 31); + unsafe { transmute(vsli_n_s32::(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p64)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_f64_p64(a: poly64x2_t) -> float64x2_t { - unsafe { - let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vsliq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert!(N >= 0 && N <= 31); + unsafe { transmute(vsliq_n_s32::(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p64)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_s64_p64(a: poly64x2_t) -> int64x2_t { - unsafe { transmute(a) } +pub fn vsli_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + static_assert!(N >= 0 && N <= 63); + unsafe { transmute(vsli_n_s64::(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p64)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_s64_p64(a: poly64x2_t) -> int64x2_t { - unsafe { - let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vsliq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert!(N >= 0 && N <= 63); + unsafe { transmute(vsliq_n_s64::(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p64)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_u64_p64(a: poly64x2_t) -> uint64x2_t { - unsafe { transmute(a) } +pub fn vsli_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { transmute(vsli_n_s8::(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p64)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub fn vreinterpretq_u64_p64(a: poly64x2_t) -> uint64x2_t { - unsafe { - let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vsliq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe { transmute(vsliq_n_s8::(transmute(a), transmute(b))) } } -#[doc = "Floating-point round to 32-bit integer, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32x_f32)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p16)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))] -pub fn vrnd32x_f32(a: float32x2_t) -> float32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frint32x.v2f32" - )] - fn _vrnd32x_f32(a: float32x2_t) -> float32x2_t; - } - unsafe { _vrnd32x_f32(a) } +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vsli_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + static_assert_uimm_bits!(N, 4); + unsafe { transmute(vsli_n_s16::(transmute(a), transmute(b))) } } -#[doc = "Floating-point round to 32-bit integer, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32xq_f32)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p16)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))] -pub fn vrnd32xq_f32(a: float32x4_t) -> float32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frint32x.v4f32" - )] - fn _vrnd32xq_f32(a: float32x4_t) -> float32x4_t; - } - unsafe { _vrnd32xq_f32(a) } +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vsliq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { transmute(vsliq_n_s16::(transmute(a), transmute(b))) } } -#[doc = "Floating-point round to 32-bit integer, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32xq_f64)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p64)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))] -pub fn vrnd32xq_f64(a: float64x2_t) -> float64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frint32x.v2f64" - )] - fn _vrnd32xq_f64(a: float64x2_t) -> float64x2_t; - } - unsafe { _vrnd32xq_f64(a) } +#[target_feature(enable = "neon,aes")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vsli_n_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t { + static_assert!(N >= 0 && N <= 63); + unsafe { transmute(vsli_n_s64::(transmute(a), transmute(b))) } } -#[doc = "Floating-point round to 32-bit integer, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32x_f64)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p64)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))] -pub fn vrnd32x_f64(a: float64x1_t) -> float64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.frint32x.f64" - )] - fn _vrnd32x_f64(a: f64) -> f64; - } - unsafe { transmute(_vrnd32x_f64(simd_extract!(a, 0))) } +#[target_feature(enable = "neon,aes")] +#[cfg_attr(test, assert_instr(sli, N = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vsliq_n_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { + static_assert!(N >= 0 && N <= 63); + unsafe { transmute(vsliq_n_s64::(transmute(a), transmute(b))) } } -#[doc = "Floating-point round to 32-bit integer toward zero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32z_f32)"] +#[doc = "Shift left and insert"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vslid_n_s64)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))] -pub fn vrnd32z_f32(a: float32x2_t) -> float32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frint32z.v2f32" - )] - fn _vrnd32z_f32(a: float32x2_t) -> float32x2_t; - } - unsafe { _vrnd32z_f32(a) } +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sli, N = 2))] +pub fn vslid_n_s64(a: i64, b: i64) -> i64 { + static_assert!(N >= 0 && N <= 63); + unsafe { transmute(vsli_n_s64::(transmute(a), transmute(b))) } } -#[doc = "Floating-point round to 32-bit integer toward zero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32zq_f32)"] +#[doc = "Shift left and insert"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vslid_n_u64)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))] -pub fn vrnd32zq_f32(a: float32x4_t) -> float32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frint32z.v4f32" - )] - fn _vrnd32zq_f32(a: float32x4_t) -> float32x4_t; - } - unsafe { _vrnd32zq_f32(a) } +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sli, N = 2))] +pub fn vslid_n_u64(a: u64, b: u64) -> u64 { + static_assert!(N >= 0 && N <= 63); + unsafe { transmute(vsli_n_u64::(transmute(a), transmute(b))) } } -#[doc = "Floating-point round to 32-bit integer toward zero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32zq_f64)"] +#[doc = "SM3PARTW1"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3partw1q_u32)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))] -pub fn vrnd32zq_f64(a: float64x2_t) -> float64x2_t { +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm3partw1))] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm3partw1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frint32z.v2f64" + link_name = "llvm.aarch64.crypto.sm3partw1" )] - fn _vrnd32zq_f64(a: float64x2_t) -> float64x2_t; + fn _vsm3partw1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t; } - unsafe { _vrnd32zq_f64(a) } + unsafe { _vsm3partw1q_u32(a, b, c) } } -#[doc = "Floating-point round to 32-bit integer toward zero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32z_f64)"] +#[doc = "SM3PARTW1"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3partw1q_u32)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))] -pub fn vrnd32z_f64(a: float64x1_t) -> float64x1_t { +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm3partw1))] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm3partw1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.frint32z.f64" + link_name = "llvm.aarch64.crypto.sm3partw1" )] - fn _vrnd32z_f64(a: f64) -> f64; + fn _vsm3partw1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t; } - unsafe { transmute(_vrnd32z_f64(simd_extract!(a, 0))) } -} -#[doc = "Floating-point round to 64-bit integer, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64x_f32)"] -#[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))] -pub fn vrnd64x_f32(a: float32x2_t) -> float32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frint64x.v2f32" - )] - fn _vrnd64x_f32(a: float32x2_t) -> float32x2_t; + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: uint32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsm3partw1q_u32(a, b, c); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vrnd64x_f32(a) } } -#[doc = "Floating-point round to 64-bit integer, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64xq_f32)"] +#[doc = "SM3PARTW2"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3partw2q_u32)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))] -pub fn vrnd64xq_f32(a: float32x4_t) -> float32x4_t { +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm3partw2))] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm3partw2q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frint64x.v4f32" + link_name = "llvm.aarch64.crypto.sm3partw2" )] - fn _vrnd64xq_f32(a: float32x4_t) -> float32x4_t; + fn _vsm3partw2q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t; } - unsafe { _vrnd64xq_f32(a) } + unsafe { _vsm3partw2q_u32(a, b, c) } } -#[doc = "Floating-point round to 64-bit integer, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64xq_f64)"] +#[doc = "SM3PARTW2"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3partw2q_u32)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))] -pub fn vrnd64xq_f64(a: float64x2_t) -> float64x2_t { +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm3partw2))] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm3partw2q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frint64x.v2f64" + link_name = "llvm.aarch64.crypto.sm3partw2" )] - fn _vrnd64xq_f64(a: float64x2_t) -> float64x2_t; + fn _vsm3partw2q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t; + } + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: uint32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsm3partw2q_u32(a, b, c); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vrnd64xq_f64(a) } } -#[doc = "Floating-point round to 64-bit integer, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64x_f64)"] +#[doc = "SM3SS1"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3ss1q_u32)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))] -pub fn vrnd64x_f64(a: float64x1_t) -> float64x1_t { +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm3ss1))] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm3ss1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.frint64x.f64" + link_name = "llvm.aarch64.crypto.sm3ss1" )] - fn _vrnd64x_f64(a: f64) -> f64; + fn _vsm3ss1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t; } - unsafe { transmute(_vrnd64x_f64(simd_extract!(a, 0))) } + unsafe { _vsm3ss1q_u32(a, b, c) } } -#[doc = "Floating-point round to 64-bit integer toward zero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64z_f32)"] +#[doc = "SM3SS1"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3ss1q_u32)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))] -pub fn vrnd64z_f32(a: float32x2_t) -> float32x2_t { +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm3ss1))] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm3ss1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frint64z.v2f32" + link_name = "llvm.aarch64.crypto.sm3ss1" )] - fn _vrnd64z_f32(a: float32x2_t) -> float32x2_t; + fn _vsm3ss1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t; + } + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: uint32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsm3ss1q_u32(a, b, c); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vrnd64z_f32(a) } } -#[doc = "Floating-point round to 64-bit integer toward zero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64zq_f32)"] +#[doc = "SM3TT1A"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt1aq_u32)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))] -pub fn vrnd64zq_f32(a: float32x4_t) -> float32x4_t { +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm3tt1a, IMM2 = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm3tt1aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(IMM2, 2); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frint64z.v4f32" + link_name = "llvm.aarch64.crypto.sm3tt1a" )] - fn _vrnd64zq_f32(a: float32x4_t) -> float32x4_t; + fn _vsm3tt1aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t; } - unsafe { _vrnd64zq_f32(a) } + unsafe { _vsm3tt1aq_u32(a, b, c, IMM2 as i64) } } -#[doc = "Floating-point round to 64-bit integer toward zero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64zq_f64)"] +#[doc = "SM3TT1A"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt1aq_u32)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))] -pub fn vrnd64zq_f64(a: float64x2_t) -> float64x2_t { +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm3tt1a, IMM2 = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm3tt1aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(IMM2, 2); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frint64z.v2f64" + link_name = "llvm.aarch64.crypto.sm3tt1a" )] - fn _vrnd64zq_f64(a: float64x2_t) -> float64x2_t; + fn _vsm3tt1aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t; + } + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: uint32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsm3tt1aq_u32(a, b, c, IMM2 as i64); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vrnd64zq_f64(a) } } -#[doc = "Floating-point round to 64-bit integer toward zero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64z_f64)"] +#[doc = "SM3TT1B"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt1bq_u32)"] #[inline] -#[target_feature(enable = "neon,frintts")] -#[unstable(feature = "stdarch_neon_ftts", issue = "117227")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))] -pub fn vrnd64z_f64(a: float64x1_t) -> float64x1_t { +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm3tt1b, IMM2 = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm3tt1bq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(IMM2, 2); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.frint64z.f64" + link_name = "llvm.aarch64.crypto.sm3tt1b" )] - fn _vrnd64z_f64(a: f64) -> f64; + fn _vsm3tt1bq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t; } - unsafe { transmute(_vrnd64z_f64(simd_extract!(a, 0))) } -} -#[doc = "Floating-point round to integral, toward zero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f16)"] -#[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frintz))] -pub fn vrnd_f16(a: float16x4_t) -> float16x4_t { - unsafe { simd_trunc(a) } -} -#[doc = "Floating-point round to integral, toward zero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndq_f16)"] -#[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frintz))] -pub fn vrndq_f16(a: float16x8_t) -> float16x8_t { - unsafe { simd_trunc(a) } -} -#[doc = "Floating-point round to integral, toward zero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f32)"] -#[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintz))] -pub fn vrnd_f32(a: float32x2_t) -> float32x2_t { - unsafe { simd_trunc(a) } -} -#[doc = "Floating-point round to integral, toward zero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndq_f32)"] -#[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintz))] -pub fn vrndq_f32(a: float32x4_t) -> float32x4_t { - unsafe { simd_trunc(a) } -} -#[doc = "Floating-point round to integral, toward zero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f64)"] -#[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintz))] -pub fn vrnd_f64(a: float64x1_t) -> float64x1_t { - unsafe { simd_trunc(a) } -} -#[doc = "Floating-point round to integral, toward zero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndq_f64)"] -#[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintz))] -pub fn vrndq_f64(a: float64x2_t) -> float64x2_t { - unsafe { simd_trunc(a) } -} -#[doc = "Floating-point round to integral, to nearest with ties to away"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f16)"] -#[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frinta))] -pub fn vrnda_f16(a: float16x4_t) -> float16x4_t { - unsafe { simd_round(a) } -} -#[doc = "Floating-point round to integral, to nearest with ties to away"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndaq_f16)"] -#[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frinta))] -pub fn vrndaq_f16(a: float16x8_t) -> float16x8_t { - unsafe { simd_round(a) } -} -#[doc = "Floating-point round to integral, to nearest with ties to away"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f32)"] -#[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frinta))] -pub fn vrnda_f32(a: float32x2_t) -> float32x2_t { - unsafe { simd_round(a) } -} -#[doc = "Floating-point round to integral, to nearest with ties to away"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndaq_f32)"] -#[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frinta))] -pub fn vrndaq_f32(a: float32x4_t) -> float32x4_t { - unsafe { simd_round(a) } -} -#[doc = "Floating-point round to integral, to nearest with ties to away"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f64)"] -#[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frinta))] -pub fn vrnda_f64(a: float64x1_t) -> float64x1_t { - unsafe { simd_round(a) } -} -#[doc = "Floating-point round to integral, to nearest with ties to away"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndaq_f64)"] -#[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frinta))] -pub fn vrndaq_f64(a: float64x2_t) -> float64x2_t { - unsafe { simd_round(a) } -} -#[doc = "Floating-point round to integral, to nearest with ties to away"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndah_f16)"] -#[inline] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frinta))] -pub fn vrndah_f16(a: f16) -> f16 { - roundf16(a) -} -#[doc = "Floating-point round to integral, to nearest with ties to away"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndh_f16)"] -#[inline] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frintz))] -pub fn vrndh_f16(a: f16) -> f16 { - truncf16(a) + unsafe { _vsm3tt1bq_u32(a, b, c, IMM2 as i64) } } -#[doc = "Floating-point round to integral, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f16)"] +#[doc = "SM3TT1B"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt1bq_u32)"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frinti))] -pub fn vrndi_f16(a: float16x4_t) -> float16x4_t { +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm3tt1b, IMM2 = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm3tt1bq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(IMM2, 2); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.nearbyint.v4f16" + link_name = "llvm.aarch64.crypto.sm3tt1b" )] - fn _vrndi_f16(a: float16x4_t) -> float16x4_t; + fn _vsm3tt1bq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t; + } + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: uint32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsm3tt1bq_u32(a, b, c, IMM2 as i64); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vrndi_f16(a) } } -#[doc = "Floating-point round to integral, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f16)"] +#[doc = "SM3TT2A"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt2aq_u32)"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frinti))] -pub fn vrndiq_f16(a: float16x8_t) -> float16x8_t { +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm3tt2a, IMM2 = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm3tt2aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(IMM2, 2); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.nearbyint.v8f16" + link_name = "llvm.aarch64.crypto.sm3tt2a" )] - fn _vrndiq_f16(a: float16x8_t) -> float16x8_t; + fn _vsm3tt2aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t; } - unsafe { _vrndiq_f16(a) } + unsafe { _vsm3tt2aq_u32(a, b, c, IMM2 as i64) } } -#[doc = "Floating-point round to integral, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f32)"] +#[doc = "SM3TT2A"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt2aq_u32)"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frinti))] -pub fn vrndi_f32(a: float32x2_t) -> float32x2_t { +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm3tt2a, IMM2 = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm3tt2aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(IMM2, 2); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.nearbyint.v2f32" + link_name = "llvm.aarch64.crypto.sm3tt2a" )] - fn _vrndi_f32(a: float32x2_t) -> float32x2_t; + fn _vsm3tt2aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t; + } + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: uint32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsm3tt2aq_u32(a, b, c, IMM2 as i64); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vrndi_f32(a) } } -#[doc = "Floating-point round to integral, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f32)"] +#[doc = "SM3TT2B"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt2bq_u32)"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frinti))] -pub fn vrndiq_f32(a: float32x4_t) -> float32x4_t { +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm3tt2b, IMM2 = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm3tt2bq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(IMM2, 2); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.nearbyint.v4f32" + link_name = "llvm.aarch64.crypto.sm3tt2b" )] - fn _vrndiq_f32(a: float32x4_t) -> float32x4_t; + fn _vsm3tt2bq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t; } - unsafe { _vrndiq_f32(a) } + unsafe { _vsm3tt2bq_u32(a, b, c, IMM2 as i64) } } -#[doc = "Floating-point round to integral, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f64)"] +#[doc = "SM3TT2B"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt2bq_u32)"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frinti))] -pub fn vrndi_f64(a: float64x1_t) -> float64x1_t { +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm3tt2b, IMM2 = 0))] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm3tt2bq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(IMM2, 2); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.nearbyint.v1f64" + link_name = "llvm.aarch64.crypto.sm3tt2b" )] - fn _vrndi_f64(a: float64x1_t) -> float64x1_t; + fn _vsm3tt2bq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t; + } + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let c: uint32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsm3tt2bq_u32(a, b, c, IMM2 as i64); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vrndi_f64(a) } } -#[doc = "Floating-point round to integral, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f64)"] +#[doc = "SM4 key"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm4ekeyq_u32)"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frinti))] -pub fn vrndiq_f64(a: float64x2_t) -> float64x2_t { +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm4ekey))] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm4ekeyq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.nearbyint.v2f64" + link_name = "llvm.aarch64.crypto.sm4ekey" )] - fn _vrndiq_f64(a: float64x2_t) -> float64x2_t; + fn _vsm4ekeyq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; } - unsafe { _vrndiq_f64(a) } + unsafe { _vsm4ekeyq_u32(a, b) } } -#[doc = "Floating-point round to integral, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndih_f16)"] +#[doc = "SM4 key"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm4ekeyq_u32)"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frinti))] -pub fn vrndih_f16(a: f16) -> f16 { +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm4ekey))] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm4ekeyq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.nearbyint.f16" + link_name = "llvm.aarch64.crypto.sm4ekey" )] - fn _vrndih_f16(a: f16) -> f16; + fn _vsm4ekeyq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsm4ekeyq_u32(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vrndih_f16(a) } -} -#[doc = "Floating-point round to integral, toward minus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f16)"] -#[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frintm))] -pub fn vrndm_f16(a: float16x4_t) -> float16x4_t { - unsafe { simd_floor(a) } -} -#[doc = "Floating-point round to integral, toward minus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmq_f16)"] -#[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frintm))] -pub fn vrndmq_f16(a: float16x8_t) -> float16x8_t { - unsafe { simd_floor(a) } -} -#[doc = "Floating-point round to integral, toward minus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f32)"] -#[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintm))] -pub fn vrndm_f32(a: float32x2_t) -> float32x2_t { - unsafe { simd_floor(a) } } -#[doc = "Floating-point round to integral, toward minus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmq_f32)"] +#[doc = "SM4 encode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm4eq_u32)"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintm))] -pub fn vrndmq_f32(a: float32x4_t) -> float32x4_t { - unsafe { simd_floor(a) } +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm4e))] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm4eq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sm4e" + )] + fn _vsm4eq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } + unsafe { _vsm4eq_u32(a, b) } } -#[doc = "Floating-point round to integral, toward minus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f64)"] +#[doc = "SM4 encode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm4eq_u32)"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintm))] -pub fn vrndm_f64(a: float64x1_t) -> float64x1_t { - unsafe { simd_floor(a) } +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,sm4")] +#[cfg_attr(test, assert_instr(sm4e))] +#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] +pub fn vsm4eq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sm4e" + )] + fn _vsm4eq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsm4eq_u32(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Floating-point round to integral, toward minus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmq_f64)"] +#[doc = "Unsigned saturating Accumulate of Signed value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqadd_u8)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintm))] -pub fn vrndmq_f64(a: float64x2_t) -> float64x2_t { - unsafe { simd_floor(a) } -} -#[doc = "Floating-point round to integral, toward minus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmh_f16)"] -#[inline] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frintm))] -pub fn vrndmh_f16(a: f16) -> f16 { - floorf16(a) +#[cfg_attr(test, assert_instr(usqadd))] +pub fn vsqadd_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.usqadd.v8i8" + )] + fn _vsqadd_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; + } + unsafe { _vsqadd_u8(a, b) } } -#[doc = "Floating-point round to integral, to nearest with ties to even"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f64)"] +#[doc = "Unsigned saturating Accumulate of Signed value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddq_u8)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintn))] -pub fn vrndn_f64(a: float64x1_t) -> float64x1_t { +#[cfg_attr(test, assert_instr(usqadd))] +pub fn vsqaddq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.roundeven.v1f64" + link_name = "llvm.aarch64.neon.usqadd.v16i8" )] - fn _vrndn_f64(a: float64x1_t) -> float64x1_t; + fn _vsqaddq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; } - unsafe { _vrndn_f64(a) } + unsafe { _vsqaddq_u8(a, b) } } -#[doc = "Floating-point round to integral, to nearest with ties to even"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnq_f64)"] +#[doc = "Unsigned saturating Accumulate of Signed value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqadd_u16)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintn))] -pub fn vrndnq_f64(a: float64x2_t) -> float64x2_t { +#[cfg_attr(test, assert_instr(usqadd))] +pub fn vsqadd_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.roundeven.v2f64" + link_name = "llvm.aarch64.neon.usqadd.v4i16" )] - fn _vrndnq_f64(a: float64x2_t) -> float64x2_t; + fn _vsqadd_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; } - unsafe { _vrndnq_f64(a) } + unsafe { _vsqadd_u16(a, b) } } -#[doc = "Floating-point round to integral, toward minus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnh_f16)"] +#[doc = "Unsigned saturating Accumulate of Signed value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddq_u16)"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frintn))] -pub fn vrndnh_f16(a: f16) -> f16 { +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(usqadd))] +pub fn vsqaddq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.roundeven.f16" + link_name = "llvm.aarch64.neon.usqadd.v8i16" )] - fn _vrndnh_f16(a: f16) -> f16; + fn _vsqaddq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; } - unsafe { _vrndnh_f16(a) } + unsafe { _vsqaddq_u16(a, b) } } -#[doc = "Floating-point round to integral, to nearest with ties to even"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndns_f32)"] +#[doc = "Unsigned saturating Accumulate of Signed value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqadd_u32)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintn))] -pub fn vrndns_f32(a: f32) -> f32 { +#[cfg_attr(test, assert_instr(usqadd))] +pub fn vsqadd_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.roundeven.f32" + link_name = "llvm.aarch64.neon.usqadd.v2i32" )] - fn _vrndns_f32(a: f32) -> f32; + fn _vsqadd_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; } - unsafe { _vrndns_f32(a) } -} -#[doc = "Floating-point round to integral, toward plus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f16)"] -#[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frintp))] -pub fn vrndp_f16(a: float16x4_t) -> float16x4_t { - unsafe { simd_ceil(a) } -} -#[doc = "Floating-point round to integral, toward plus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndpq_f16)"] -#[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frintp))] -pub fn vrndpq_f16(a: float16x8_t) -> float16x8_t { - unsafe { simd_ceil(a) } -} -#[doc = "Floating-point round to integral, toward plus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f32)"] -#[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintp))] -pub fn vrndp_f32(a: float32x2_t) -> float32x2_t { - unsafe { simd_ceil(a) } -} -#[doc = "Floating-point round to integral, toward plus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndpq_f32)"] -#[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintp))] -pub fn vrndpq_f32(a: float32x4_t) -> float32x4_t { - unsafe { simd_ceil(a) } -} -#[doc = "Floating-point round to integral, toward plus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f64)"] -#[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintp))] -pub fn vrndp_f64(a: float64x1_t) -> float64x1_t { - unsafe { simd_ceil(a) } + unsafe { _vsqadd_u32(a, b) } } -#[doc = "Floating-point round to integral, toward plus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndpq_f64)"] +#[doc = "Unsigned saturating Accumulate of Signed value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddq_u32)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintp))] -pub fn vrndpq_f64(a: float64x2_t) -> float64x2_t { - unsafe { simd_ceil(a) } -} -#[doc = "Floating-point round to integral, toward plus infinity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndph_f16)"] -#[inline] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frintp))] -pub fn vrndph_f16(a: f16) -> f16 { - ceilf16(a) -} -#[doc = "Floating-point round to integral exact, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f16)"] -#[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frintx))] -pub fn vrndx_f16(a: float16x4_t) -> float16x4_t { - unsafe { simd_round_ties_even(a) } -} -#[doc = "Floating-point round to integral exact, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f16)"] -#[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frintx))] -pub fn vrndxq_f16(a: float16x8_t) -> float16x8_t { - unsafe { simd_round_ties_even(a) } +#[cfg_attr(test, assert_instr(usqadd))] +pub fn vsqaddq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.usqadd.v4i32" + )] + fn _vsqaddq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; + } + unsafe { _vsqaddq_u32(a, b) } } -#[doc = "Floating-point round to integral exact, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f32)"] +#[doc = "Unsigned saturating Accumulate of Signed value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqadd_u64)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintx))] -pub fn vrndx_f32(a: float32x2_t) -> float32x2_t { - unsafe { simd_round_ties_even(a) } +#[cfg_attr(test, assert_instr(usqadd))] +pub fn vsqadd_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.usqadd.v1i64" + )] + fn _vsqadd_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; + } + unsafe { _vsqadd_u64(a, b) } } -#[doc = "Floating-point round to integral exact, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f32)"] +#[doc = "Unsigned saturating Accumulate of Signed value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddq_u64)"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintx))] -pub fn vrndxq_f32(a: float32x4_t) -> float32x4_t { - unsafe { simd_round_ties_even(a) } +#[cfg_attr(test, assert_instr(usqadd))] +pub fn vsqaddq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.usqadd.v2i64" + )] + fn _vsqaddq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; + } + unsafe { _vsqaddq_u64(a, b) } } -#[doc = "Floating-point round to integral exact, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f64)"] +#[doc = "Unsigned saturating accumulate of signed value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddb_u8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(usqadd))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintx))] -pub fn vrndx_f64(a: float64x1_t) -> float64x1_t { - unsafe { simd_round_ties_even(a) } +pub fn vsqaddb_u8(a: u8, b: i8) -> u8 { + vget_lane_u8::<0>(vsqadd_u8(vdup_n_u8(a), vdup_n_s8(b))) } -#[doc = "Floating-point round to integral exact, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f64)"] +#[doc = "Unsigned saturating accumulate of signed value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddh_u16)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(usqadd))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(frintx))] -pub fn vrndxq_f64(a: float64x2_t) -> float64x2_t { - unsafe { simd_round_ties_even(a) } -} -#[doc = "Floating-point round to integral, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxh_f16)"] -#[inline] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(frintx))] -pub fn vrndxh_f16(a: f16) -> f16 { - round_ties_even_f16(a) +pub fn vsqaddh_u16(a: u16, b: i16) -> u16 { + vget_lane_u16::<0>(vsqadd_u16(vdup_n_u16(a), vdup_n_s16(b))) } -#[doc = "Signed rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshld_s64)"] +#[doc = "Unsigned saturating accumulate of signed value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddd_u64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(srshl))] +#[cfg_attr(test, assert_instr(usqadd))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrshld_s64(a: i64, b: i64) -> i64 { +pub fn vsqaddd_u64(a: u64, b: i64) -> u64 { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srshl.i64" + link_name = "llvm.aarch64.neon.usqadd.i64" )] - fn _vrshld_s64(a: i64, b: i64) -> i64; + fn _vsqaddd_u64(a: u64, b: i64) -> u64; } - unsafe { _vrshld_s64(a, b) } + unsafe { _vsqaddd_u64(a, b) } } -#[doc = "Unsigned rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshld_u64)"] +#[doc = "Unsigned saturating accumulate of signed value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqadds_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(urshl))] +#[cfg_attr(test, assert_instr(usqadd))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrshld_u64(a: u64, b: i64) -> u64 { +pub fn vsqadds_u32(a: u32, b: i32) -> u32 { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urshl.i64" + link_name = "llvm.aarch64.neon.usqadd.i32" )] - fn _vrshld_u64(a: u64, b: i64) -> u64; + fn _vsqadds_u32(a: u32, b: i32) -> u32; } - unsafe { _vrshld_u64(a, b) } + unsafe { _vsqadds_u32(a, b) } +} +#[doc = "Calculates the square root of each lane."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrt_f16)"] +#[inline] +#[cfg_attr(test, assert_instr(fsqrt))] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vsqrt_f16(a: float16x4_t) -> float16x4_t { + unsafe { simd_fsqrt(a) } +} +#[doc = "Calculates the square root of each lane."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrtq_f16)"] +#[inline] +#[cfg_attr(test, assert_instr(fsqrt))] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vsqrtq_f16(a: float16x8_t) -> float16x8_t { + unsafe { simd_fsqrt(a) } } -#[doc = "Signed rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrd_n_s64)"] +#[doc = "Calculates the square root of each lane."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrt_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(srshr, N = 2))] -#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(fsqrt))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrshrd_n_s64(a: i64) -> i64 { - static_assert!(N >= 1 && N <= 64); - vrshld_s64(a, -N as i64) +pub fn vsqrt_f32(a: float32x2_t) -> float32x2_t { + unsafe { simd_fsqrt(a) } } -#[doc = "Unsigned rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrd_n_u64)"] +#[doc = "Calculates the square root of each lane."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrtq_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(urshr, N = 2))] -#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(fsqrt))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrshrd_n_u64(a: u64) -> u64 { - static_assert!(N >= 1 && N <= 64); - vrshld_u64(a, -N as i64) +pub fn vsqrtq_f32(a: float32x4_t) -> float32x4_t { + unsafe { simd_fsqrt(a) } } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s16)"] +#[doc = "Calculates the square root of each lane."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrt_f64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(rshrn2, N = 2))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(fsqrt))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { - static_assert!(N >= 1 && N <= 8); - unsafe { - simd_shuffle!( - a, - vrshrn_n_s16::(b), - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - ) - } +pub fn vsqrt_f64(a: float64x1_t) -> float64x1_t { + unsafe { simd_fsqrt(a) } } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s32)"] +#[doc = "Calculates the square root of each lane."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrtq_f64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(rshrn2, N = 2))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(fsqrt))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t { - static_assert!(N >= 1 && N <= 16); - unsafe { simd_shuffle!(a, vrshrn_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } +pub fn vsqrtq_f64(a: float64x2_t) -> float64x2_t { + unsafe { simd_fsqrt(a) } } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s64)"] +#[doc = "Floating-point round to integral, using current rounding mode"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrth_f16)"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(fsqrt))] +pub fn vsqrth_f16(a: f16) -> f16 { + sqrtf16(a) +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(rshrn2, N = 2))] +#[cfg_attr(test, assert_instr(sri, N = 1))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t { - static_assert!(N >= 1 && N <= 32); - unsafe { simd_shuffle!(a, vrshrn_n_s64::(b), [0, 1, 2, 3]) } +pub fn vsri_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe { super::shift_right_and_insert!(u8, 8, N, a, b) } } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u16)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(rshrn2, N = 2))] +#[cfg_attr(test, assert_instr(sri, N = 1))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t { +pub fn vsriq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { static_assert!(N >= 1 && N <= 8); - unsafe { - simd_shuffle!( - a, - vrshrn_n_u16::(b), - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - ) - } + unsafe { super::shift_right_and_insert!(u8, 16, N, a, b) } } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u32)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(rshrn2, N = 2))] +#[cfg_attr(test, assert_instr(sri, N = 1))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t { +pub fn vsri_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { static_assert!(N >= 1 && N <= 16); - unsafe { simd_shuffle!(a, vrshrn_n_u32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } + unsafe { super::shift_right_and_insert!(u16, 4, N, a, b) } } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u64)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(rshrn2, N = 2))] +#[cfg_attr(test, assert_instr(sri, N = 1))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t { - static_assert!(N >= 1 && N <= 32); - unsafe { simd_shuffle!(a, vrshrn_n_u64::(b), [0, 1, 2, 3]) } +pub fn vsriq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert!(N >= 1 && N <= 16); + unsafe { super::shift_right_and_insert!(u16, 8, N, a, b) } } -#[doc = "Reciprocal square-root estimate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f64)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(frsqrte))] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsqrte_f64(a: float64x1_t) -> float64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrte.v1f64" - )] - fn _vrsqrte_f64(a: float64x1_t) -> float64x1_t; - } - unsafe { _vrsqrte_f64(a) } +pub fn vsri_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe { super::shift_right_and_insert!(u32, 2, N, a, b) } } -#[doc = "Reciprocal square-root estimate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_f64)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(frsqrte))] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsqrteq_f64(a: float64x2_t) -> float64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrte.v2f64" - )] - fn _vrsqrteq_f64(a: float64x2_t) -> float64x2_t; - } - unsafe { _vrsqrteq_f64(a) } +pub fn vsriq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert!(N >= 1 && N <= 32); + unsafe { super::shift_right_and_insert!(u32, 4, N, a, b) } } -#[doc = "Reciprocal square-root estimate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrted_f64)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(frsqrte))] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsqrted_f64(a: f64) -> f64 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrte.f64" - )] - fn _vrsqrted_f64(a: f64) -> f64; - } - unsafe { _vrsqrted_f64(a) } +pub fn vsri_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + static_assert!(N >= 1 && N <= 64); + unsafe { super::shift_right_and_insert!(u64, 1, N, a, b) } } -#[doc = "Reciprocal square-root estimate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtes_f32)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(frsqrte))] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsqrtes_f32(a: f32) -> f32 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrte.f32" - )] - fn _vrsqrtes_f32(a: f32) -> f32; - } - unsafe { _vrsqrtes_f32(a) } +pub fn vsriq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert!(N >= 1 && N <= 64); + unsafe { super::shift_right_and_insert!(u64, 2, N, a, b) } } -#[doc = "Reciprocal square-root estimate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteh_f16)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u8)"] #[inline] -#[cfg_attr(test, assert_instr(frsqrte))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vrsqrteh_f16(a: f16) -> f16 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrte.f16" - )] - fn _vrsqrteh_f16(a: f16) -> f16; - } - unsafe { _vrsqrteh_f16(a) } +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vsri_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe { transmute(vsri_n_s8::(transmute(a), transmute(b))) } } -#[doc = "Floating-point reciprocal square root step"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f64)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(frsqrts))] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsqrts_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrts.v1f64" - )] - fn _vrsqrts_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t; - } - unsafe { _vrsqrts_f64(a, b) } +pub fn vsriq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert!(N >= 1 && N <= 8); + unsafe { transmute(vsriq_n_s8::(transmute(a), transmute(b))) } } -#[doc = "Floating-point reciprocal square root step"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsq_f64)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(frsqrts))] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsqrtsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrts.v2f64" - )] - fn _vrsqrtsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; - } - unsafe { _vrsqrtsq_f64(a, b) } +pub fn vsri_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe { transmute(vsri_n_s16::(transmute(a), transmute(b))) } } -#[doc = "Floating-point reciprocal square root step"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsd_f64)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(frsqrts))] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsqrtsd_f64(a: f64, b: f64) -> f64 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrts.f64" - )] - fn _vrsqrtsd_f64(a: f64, b: f64) -> f64; - } - unsafe { _vrsqrtsd_f64(a, b) } +pub fn vsriq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert!(N >= 1 && N <= 16); + unsafe { transmute(vsriq_n_s16::(transmute(a), transmute(b))) } } -#[doc = "Floating-point reciprocal square root step"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtss_f32)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(frsqrts))] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsqrtss_f32(a: f32, b: f32) -> f32 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrts.f32" - )] - fn _vrsqrtss_f32(a: f32, b: f32) -> f32; - } - unsafe { _vrsqrtss_f32(a, b) } +pub fn vsri_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe { transmute(vsri_n_s32::(transmute(a), transmute(b))) } } -#[doc = "Floating-point reciprocal square root step"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsh_f16)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u32)"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[cfg_attr(test, assert_instr(frsqrts))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vrsqrtsh_f16(a: f16, b: f16) -> f16 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrts.f16" - )] - fn _vrsqrtsh_f16(a: f16, b: f16) -> f16; - } - unsafe { _vrsqrtsh_f16(a, b) } +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vsriq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert!(N >= 1 && N <= 32); + unsafe { transmute(vsriq_n_s32::(transmute(a), transmute(b))) } } -#[doc = "Signed rounding shift right and accumulate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsrad_n_s64)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(srshr, N = 2))] +#[cfg_attr(test, assert_instr(sri, N = 1))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsrad_n_s64(a: i64, b: i64) -> i64 { +pub fn vsri_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { static_assert!(N >= 1 && N <= 64); - let b: i64 = vrshrd_n_s64::(b); - a.wrapping_add(b) + unsafe { transmute(vsri_n_s64::(transmute(a), transmute(b))) } } -#[doc = "Unsigned rounding shift right and accumulate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsrad_n_u64)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(urshr, N = 2))] +#[cfg_attr(test, assert_instr(sri, N = 1))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsrad_n_u64(a: u64, b: u64) -> u64 { +pub fn vsriq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { static_assert!(N >= 1 && N <= 64); - let b: u64 = vrshrd_n_u64::(b); - a.wrapping_add(b) + unsafe { transmute(vsriq_n_s64::(transmute(a), transmute(b))) } } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s16)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_endian = "little")] -#[cfg_attr(test, assert_instr(rsubhn2))] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t { - let x: int8x8_t = vrsubhn_s16(b, c); - unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } +pub fn vsri_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe { transmute(vsri_n_s8::(transmute(a), transmute(b))) } } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s32)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_endian = "little")] -#[cfg_attr(test, assert_instr(rsubhn2))] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t { - let x: int16x4_t = vrsubhn_s32(b, c); - unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7]) } +pub fn vsriq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + static_assert!(N >= 1 && N <= 8); + unsafe { transmute(vsriq_n_s8::(transmute(a), transmute(b))) } } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s64)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_endian = "little")] -#[cfg_attr(test, assert_instr(rsubhn2))] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t { - let x: int32x2_t = vrsubhn_s64(b, c); - unsafe { simd_shuffle!(a, x, [0, 1, 2, 3]) } +pub fn vsri_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe { transmute(vsri_n_s16::(transmute(a), transmute(b))) } } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u16)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_endian = "little")] -#[cfg_attr(test, assert_instr(rsubhn2))] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t { - let x: uint8x8_t = vrsubhn_u16(b, c); - unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } +pub fn vsriq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + static_assert!(N >= 1 && N <= 16); + unsafe { transmute(vsriq_n_s16::(transmute(a), transmute(b))) } } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u32)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p64)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(target_endian = "little")] -#[cfg_attr(test, assert_instr(rsubhn2))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t { - let x: uint16x4_t = vrsubhn_u32(b, c); - unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7]) } +pub fn vsri_n_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t { + static_assert!(N >= 1 && N <= 64); + unsafe { transmute(vsri_n_s64::(transmute(a), transmute(b))) } } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u64)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p64)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(target_endian = "little")] -#[cfg_attr(test, assert_instr(rsubhn2))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(test, assert_instr(sri, N = 1))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t { - let x: uint32x2_t = vrsubhn_u64(b, c); - unsafe { simd_shuffle!(a, x, [0, 1, 2, 3]) } +pub fn vsriq_n_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { + static_assert!(N >= 1 && N <= 64); + unsafe { transmute(vsriq_n_s64::(transmute(a), transmute(b))) } } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s16)"] +#[doc = "Shift right and insert"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsrid_n_s64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_endian = "big")] -#[cfg_attr(test, assert_instr(rsubhn))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t { - let x: int8x8_t = vrsubhn_s16(b, c); - unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(bfxil, N = 2))] +pub fn vsrid_n_s64(a: i64, b: i64) -> i64 { + static_assert!(N >= 1 && N <= 64); + unsafe { transmute(vsri_n_s64::(transmute(a), transmute(b))) } } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s32)"] +#[doc = "Shift right and insert"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsrid_n_u64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_endian = "big")] -#[cfg_attr(test, assert_instr(rsubhn))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t { - let x: int16x4_t = vrsubhn_s32(b, c); - unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7]) } +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(bfxil, N = 2))] +pub fn vsrid_n_u64(a: u64, b: u64) -> u64 { + static_assert!(N >= 1 && N <= 64); + unsafe { transmute(vsri_n_u64::(transmute(a), transmute(b))) } } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(target_endian = "big")] -#[cfg_attr(test, assert_instr(rsubhn))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t { - let x: int32x2_t = vrsubhn_s64(b, c); - unsafe { simd_shuffle!(a, x, [0, 1, 2, 3]) } +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst1_f16(ptr: *mut f16, a: float16x4_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst1q_f16(ptr: *mut f16, a: float16x8_t) { + crate::ptr::write_unaligned(ptr.cast(), a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_endian = "big")] -#[cfg_attr(test, assert_instr(rsubhn))] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t { - let x: uint8x8_t = vrsubhn_u16(b, c); - unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } +pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_endian = "big")] -#[cfg_attr(test, assert_instr(rsubhn))] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t { - let x: uint16x4_t = vrsubhn_u32(b, c); - unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7]) } +pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_endian = "big")] -#[cfg_attr(test, assert_instr(rsubhn))] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t { - let x: uint32x2_t = vrsubhn_u64(b, c); - unsafe { simd_shuffle!(a, x, [0, 1, 2, 3]) } +pub unsafe fn vst1_f64(ptr: *mut f64, a: float64x1_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Multi-vector floating-point adjust exponent"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscale_f16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[unstable(feature = "stdarch_neon_fp8", issue = "none")] -#[target_feature(enable = "neon,fp8")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))] -pub fn vscale_f16(vn: float16x4_t, vm: int16x4_t) -> float16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fp8.fscale.v4f16" - )] - fn _vscale_f16(vn: float16x4_t, vm: int16x4_t) -> float16x4_t; - } - unsafe { _vscale_f16(vn, vm) } +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_f64(ptr: *mut f64, a: float64x2_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Multi-vector floating-point adjust exponent"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscaleq_f16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[unstable(feature = "stdarch_neon_fp8", issue = "none")] -#[target_feature(enable = "neon,fp8")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))] -pub fn vscaleq_f16(vn: float16x8_t, vm: int16x8_t) -> float16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fp8.fscale.v8f16" - )] - fn _vscaleq_f16(vn: float16x8_t, vm: int16x8_t) -> float16x8_t; - } - unsafe { _vscaleq_f16(vn, vm) } +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Multi-vector floating-point adjust exponent"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscale_f32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[unstable(feature = "stdarch_neon_fp8", issue = "none")] -#[target_feature(enable = "neon,fp8")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))] -pub fn vscale_f32(vn: float32x2_t, vm: int32x2_t) -> float32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fp8.fscale.v2f32" - )] - fn _vscale_f32(vn: float32x2_t, vm: int32x2_t) -> float32x2_t; - } - unsafe { _vscale_f32(vn, vm) } +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Multi-vector floating-point adjust exponent"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscaleq_f32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[unstable(feature = "stdarch_neon_fp8", issue = "none")] -#[target_feature(enable = "neon,fp8")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))] -pub fn vscaleq_f32(vn: float32x4_t, vm: int32x4_t) -> float32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fp8.fscale.v4f32" - )] - fn _vscaleq_f32(vn: float32x4_t, vm: int32x4_t) -> float32x4_t; - } - unsafe { _vscaleq_f32(vn, vm) } +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Multi-vector floating-point adjust exponent"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscaleq_f64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[unstable(feature = "stdarch_neon_fp8", issue = "none")] -#[target_feature(enable = "neon,fp8")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))] -pub fn vscaleq_f64(vn: float64x2_t, vm: int64x2_t) -> float64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fp8.fscale.v2f64" - )] - fn _vscaleq_f64(vn: float64x2_t, vm: int64x2_t) -> float64x2_t; - } - unsafe { _vscaleq_f64(vn, vm) } +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop, LANE = 0))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vset_lane_f64(a: f64, b: float64x1_t) -> float64x1_t { - static_assert!(LANE == 0); - unsafe { simd_insert!(b, LANE as u32, a) } +pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop, LANE = 0))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsetq_lane_f64(a: f64, b: float64x2_t) -> float64x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { simd_insert!(b, LANE as u32, a) } +pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "SHA512 hash update part 2"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512h2q_u64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,sha3")] -#[cfg_attr(test, assert_instr(sha512h2))] -#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] -pub fn vsha512h2q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sha512h2" - )] - fn _vsha512h2q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t; - } - unsafe { _vsha512h2q_u64(a, b, c) } +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "SHA512 hash update part 1"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512hq_u64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,sha3")] -#[cfg_attr(test, assert_instr(sha512h))] -#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] -pub fn vsha512hq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sha512h" - )] - fn _vsha512hq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t; - } - unsafe { _vsha512hq_u64(a, b, c) } +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "SHA512 schedule update 0"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512su0q_u64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,sha3")] -#[cfg_attr(test, assert_instr(sha512su0))] -#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] -pub fn vsha512su0q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sha512su0" - )] - fn _vsha512su0q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - } - unsafe { _vsha512su0q_u64(a, b) } +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "SHA512 schedule update 1"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512su1q_u64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,sha3")] -#[cfg_attr(test, assert_instr(sha512su1))] -#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] -pub fn vsha512su1q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sha512su1" - )] - fn _vsha512su1q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t; - } - unsafe { _vsha512su1q_u64(a, b, c) } +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Signed Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshld_s64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sshl))] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vshld_s64(a: i64, b: i64) -> i64 { - unsafe { transmute(vshl_s64(transmute(a), transmute(b))) } +pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Unsigned Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshld_u64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(ushl))] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vshld_u64(a: u64, b: i64) -> u64 { - unsafe { transmute(vshl_u64(transmute(a), transmute(b))) } +pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Signed shift left long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sshll2, N = 2))] -#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vshll_high_n_s8(a: int8x16_t) -> int16x8_t { - static_assert!(N >= 0 && N <= 8); - unsafe { - let b: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); - vshll_n_s8::(b) - } +pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Signed shift left long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sshll2, N = 2))] -#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vshll_high_n_s16(a: int16x8_t) -> int32x4_t { - static_assert!(N >= 0 && N <= 16); - unsafe { - let b: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]); - vshll_n_s16::(b) - } +pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Signed shift left long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sshll2, N = 2))] -#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vshll_high_n_s32(a: int32x4_t) -> int64x2_t { - static_assert!(N >= 0 && N <= 32); - unsafe { - let b: int32x2_t = simd_shuffle!(a, a, [2, 3]); - vshll_n_s32::(b) - } +pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Signed shift left long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(ushll2, N = 2))] -#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vshll_high_n_u8(a: uint8x16_t) -> uint16x8_t { - static_assert!(N >= 0 && N <= 8); - unsafe { - let b: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); - vshll_n_u8::(b) - } +pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Signed shift left long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(ushll2, N = 2))] -#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vshll_high_n_u16(a: uint16x8_t) -> uint32x4_t { - static_assert!(N >= 0 && N <= 16); - unsafe { - let b: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]); - vshll_n_u16::(b) - } +pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Signed shift left long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(ushll2, N = 2))] -#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vshll_high_n_u32(a: uint32x4_t) -> uint64x2_t { - static_assert!(N >= 0 && N <= 32); - unsafe { - let b: uint32x2_t = simd_shuffle!(a, a, [2, 3]); - vshll_n_u32::(b) - } +pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(shrn2, N = 2))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { - static_assert!(N >= 1 && N <= 8); - unsafe { - simd_shuffle!( - a, - vshrn_n_s16::(b), - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - ) - } +pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(shrn2, N = 2))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t { - static_assert!(N >= 1 && N <= 16); - unsafe { simd_shuffle!(a, vshrn_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } +pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(shrn2, N = 2))] -#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t { - static_assert!(N >= 1 && N <= 32); - unsafe { simd_shuffle!(a, vshrn_n_s64::(b), [0, 1, 2, 3]) } +pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(shrn2, N = 2))] -#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t { - static_assert!(N >= 1 && N <= 8); - unsafe { - simd_shuffle!( - a, - vshrn_n_u16::(b), - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - ) - } +pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) { + crate::ptr::write_unaligned(ptr.cast(), a) } -#[doc = "Shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(shrn2, N = 2))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st1))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t { - static_assert!(N >= 1 && N <= 16); - unsafe { simd_shuffle!(a, vshrn_n_u32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } +pub unsafe fn vst1_f64_x2(a: *mut f64, b: float64x1x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x2.v1f64.p0" + )] + fn _vst1_f64_x2(a: float64x1_t, b: float64x1_t, ptr: *mut f64); + } + _vst1_f64_x2(b.0, b.1, a) } -#[doc = "Shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u64)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(shrn2, N = 2))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st1))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t { - static_assert!(N >= 1 && N <= 32); - unsafe { simd_shuffle!(a, vshrn_n_u64::(b), [0, 1, 2, 3]) } +pub unsafe fn vst1q_f64_x2(a: *mut f64, b: float64x2x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x2.v2f64.p0" + )] + fn _vst1q_f64_x2(a: float64x2_t, b: float64x2_t, ptr: *mut f64); + } + _vst1q_f64_x2(b.0, b.1, a) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s8)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st1))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsli_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - static_assert_uimm_bits!(N, 3); +pub unsafe fn vst1_f64_x3(a: *mut f64, b: float64x1x3_t) { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.vsli.v8i8" + link_name = "llvm.aarch64.neon.st1x3.v1f64.p0" )] - fn _vsli_n_s8(a: int8x8_t, b: int8x8_t, n: i32) -> int8x8_t; + fn _vst1_f64_x3(a: float64x1_t, b: float64x1_t, c: float64x1_t, ptr: *mut f64); } - unsafe { _vsli_n_s8(a, b, N) } + _vst1_f64_x3(b.0, b.1, b.2, a) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s8)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st1))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsliq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - static_assert_uimm_bits!(N, 3); +pub unsafe fn vst1q_f64_x3(a: *mut f64, b: float64x2x3_t) { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.vsli.v16i8" + link_name = "llvm.aarch64.neon.st1x3.v2f64.p0" )] - fn _vsliq_n_s8(a: int8x16_t, b: int8x16_t, n: i32) -> int8x16_t; + fn _vst1q_f64_x3(a: float64x2_t, b: float64x2_t, c: float64x2_t, ptr: *mut f64); } - unsafe { _vsliq_n_s8(a, b, N) } + _vst1q_f64_x3(b.0, b.1, b.2, a) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st1))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsli_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - static_assert_uimm_bits!(N, 4); +pub unsafe fn vst1_f64_x4(a: *mut f64, b: float64x1x4_t) { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.vsli.v4i16" + link_name = "llvm.aarch64.neon.st1x4.v1f64.p0" )] - fn _vsli_n_s16(a: int16x4_t, b: int16x4_t, n: i32) -> int16x4_t; + fn _vst1_f64_x4( + a: float64x1_t, + b: float64x1_t, + c: float64x1_t, + d: float64x1_t, + ptr: *mut f64, + ); } - unsafe { _vsli_n_s16(a, b, N) } + _vst1_f64_x4(b.0, b.1, b.2, b.3, a) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st1))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsliq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - static_assert_uimm_bits!(N, 4); +pub unsafe fn vst1q_f64_x4(a: *mut f64, b: float64x2x4_t) { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.vsli.v8i16" + link_name = "llvm.aarch64.neon.st1x4.v2f64.p0" )] - fn _vsliq_n_s16(a: int16x8_t, b: int16x8_t, n: i32) -> int16x8_t; + fn _vst1q_f64_x4( + a: float64x2_t, + b: float64x2_t, + c: float64x2_t, + d: float64x2_t, + ptr: *mut f64, + ); } - unsafe { _vsliq_n_s16(a, b, N) } + _vst1q_f64_x4(b.0, b.1, b.2, b.3, a) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] +#[cfg_attr(test, assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsli_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - static_assert!(N >= 0 && N <= 31); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.vsli.v2i32" - )] - fn _vsli_n_s32(a: int32x2_t, b: int32x2_t, n: i32) -> int32x2_t; - } - unsafe { _vsli_n_s32(a, b, N) } +pub unsafe fn vst1_lane_f64(a: *mut f64, b: float64x1_t) { + static_assert!(LANE == 0); + *a = simd_extract!(b, LANE as u32); } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] +#[cfg_attr(test, assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsliq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - static_assert!(N >= 0 && N <= 31); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.vsli.v4i32" - )] - fn _vsliq_n_s32(a: int32x4_t, b: int32x4_t, n: i32) -> int32x4_t; - } - unsafe { _vsliq_n_s32(a, b, N) } +pub unsafe fn vst1q_lane_f64(a: *mut f64, b: float64x2_t) { + static_assert_uimm_bits!(LANE, 1); + *a = simd_extract!(b, LANE as u32); } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(stp))] +pub unsafe fn vst2_f64(a: *mut f64, b: float64x1x2_t) { + core::ptr::write_unaligned(a.cast(), b) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsli_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - static_assert!(N >= 0 && N <= 63); +pub unsafe fn vst2_lane_f64(a: *mut f64, b: float64x1x2_t) { + static_assert!(LANE == 0); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.vsli.v1i64" + link_name = "llvm.aarch64.neon.st2lane.v1f64.p0" )] - fn _vsli_n_s64(a: int64x1_t, b: int64x1_t, n: i32) -> int64x1_t; + fn _vst2_lane_f64(a: float64x1_t, b: float64x1_t, n: i64, ptr: *mut i8); } - unsafe { _vsli_n_s64(a, b, N) } + _vst2_lane_f64(b.0, b.1, LANE as i64, a as _) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsliq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - static_assert!(N >= 0 && N <= 63); +pub unsafe fn vst2_lane_s64(a: *mut i64, b: int64x1x2_t) { + static_assert!(LANE == 0); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.vsli.v2i64" + link_name = "llvm.aarch64.neon.st2lane.v1i64.p0" )] - fn _vsliq_n_s64(a: int64x2_t, b: int64x2_t, n: i32) -> int64x2_t; + fn _vst2_lane_s64(a: int64x1_t, b: int64x1_t, n: i64, ptr: *mut i8); } - unsafe { _vsliq_n_s64(a, b, N) } -} -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u8)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsli_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - static_assert_uimm_bits!(N, 3); - unsafe { transmute(vsli_n_s8::(transmute(a), transmute(b))) } -} -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u8)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsliq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - static_assert_uimm_bits!(N, 3); - unsafe { transmute(vsliq_n_s8::(transmute(a), transmute(b))) } + _vst2_lane_s64(b.0, b.1, LANE as i64, a as _) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsli_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - static_assert_uimm_bits!(N, 4); - unsafe { transmute(vsli_n_s16::(transmute(a), transmute(b))) } +pub unsafe fn vst2_lane_p64(a: *mut p64, b: poly64x1x2_t) { + static_assert!(LANE == 0); + vst2_lane_s64::(transmute(a), transmute(b)) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsliq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - static_assert_uimm_bits!(N, 4); - unsafe { transmute(vsliq_n_s16::(transmute(a), transmute(b))) } +pub unsafe fn vst2_lane_u64(a: *mut u64, b: uint64x1x2_t) { + static_assert!(LANE == 0); + vst2_lane_s64::(transmute(a), transmute(b)) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsli_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - static_assert!(N >= 0 && N <= 31); - unsafe { transmute(vsli_n_s32::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2q_f64(a: *mut f64, b: float64x2x2_t) { + crate::core_arch::macros::interleaving_store!(f64, 2, 2, a, b) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsliq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - static_assert!(N >= 0 && N <= 31); - unsafe { transmute(vsliq_n_s32::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2q_s64(a: *mut i64, b: int64x2x2_t) { + crate::core_arch::macros::interleaving_store!(i64, 2, 2, a, b) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsli_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - static_assert!(N >= 0 && N <= 63); - unsafe { transmute(vsli_n_s64::(transmute(a), transmute(b))) } +pub unsafe fn vst2q_lane_f64(a: *mut f64, b: float64x2x2_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v2f64.p0" + )] + fn _vst2q_lane_f64(a: float64x2_t, b: float64x2_t, n: i64, ptr: *mut i8); + } + _vst2q_lane_f64(b.0, b.1, LANE as i64, a as _) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsliq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - static_assert!(N >= 0 && N <= 63); - unsafe { transmute(vsliq_n_s64::(transmute(a), transmute(b))) } +pub unsafe fn vst2q_lane_s8(a: *mut i8, b: int8x16x2_t) { + static_assert_uimm_bits!(LANE, 4); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v16i8.p0" + )] + fn _vst2q_lane_s8(a: int8x16_t, b: int8x16_t, n: i64, ptr: *mut i8); + } + _vst2q_lane_s8(b.0, b.1, LANE as i64, a as _) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p8)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsli_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - static_assert_uimm_bits!(N, 3); - unsafe { transmute(vsli_n_s8::(transmute(a), transmute(b))) } +pub unsafe fn vst2q_lane_s64(a: *mut i64, b: int64x2x2_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v2i64.p0" + )] + fn _vst2q_lane_s64(a: int64x2_t, b: int64x2_t, n: i64, ptr: *mut i8); + } + _vst2q_lane_s64(b.0, b.1, LANE as i64, a as _) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p8)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsliq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { - static_assert_uimm_bits!(N, 3); - unsafe { transmute(vsliq_n_s8::(transmute(a), transmute(b))) } +pub unsafe fn vst2q_lane_p64(a: *mut p64, b: poly64x2x2_t) { + static_assert_uimm_bits!(LANE, 1); + vst2q_lane_s64::(transmute(a), transmute(b)) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsli_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - static_assert_uimm_bits!(N, 4); - unsafe { transmute(vsli_n_s16::(transmute(a), transmute(b))) } +pub unsafe fn vst2q_lane_u8(a: *mut u8, b: uint8x16x2_t) { + static_assert_uimm_bits!(LANE, 4); + vst2q_lane_s8::(transmute(a), transmute(b)) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sli, N = 1))] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsliq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - static_assert_uimm_bits!(N, 4); - unsafe { transmute(vsliq_n_s16::(transmute(a), transmute(b))) } +pub unsafe fn vst2q_lane_u64(a: *mut u64, b: uint64x2x2_t) { + static_assert_uimm_bits!(LANE, 1); + vst2q_lane_s64::(transmute(a), transmute(b)) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(sli, N = 1))] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsli_n_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t { - static_assert!(N >= 0 && N <= 63); - unsafe { transmute(vsli_n_s64::(transmute(a), transmute(b))) } +pub unsafe fn vst2q_lane_p8(a: *mut p8, b: poly8x16x2_t) { + static_assert_uimm_bits!(LANE, 4); + vst2q_lane_s8::(transmute(a), transmute(b)) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(sli, N = 1))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st2))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsliq_n_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { - static_assert!(N >= 0 && N <= 63); - unsafe { transmute(vsliq_n_s64::(transmute(a), transmute(b))) } +pub unsafe fn vst2q_p64(a: *mut p64, b: poly64x2x2_t) { + vst2q_s64(transmute(a), transmute(b)) } -#[doc = "Shift left and insert"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vslid_n_s64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sli, N = 2))] -pub fn vslid_n_s64(a: i64, b: i64) -> i64 { - static_assert!(N >= 0 && N <= 63); - unsafe { transmute(vsli_n_s64::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2q_u64(a: *mut u64, b: uint64x2x2_t) { + vst2q_s64(transmute(a), transmute(b)) } -#[doc = "Shift left and insert"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vslid_n_u64)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sli, N = 2))] -pub fn vslid_n_u64(a: u64, b: u64) -> u64 { - static_assert!(N >= 0 && N <= 63); - unsafe { transmute(vsli_n_u64::(transmute(a), transmute(b))) } -} -#[doc = "SM3PARTW1"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3partw1q_u32)"] -#[inline] -#[target_feature(enable = "neon,sm4")] -#[cfg_attr(test, assert_instr(sm3partw1))] -#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] -pub fn vsm3partw1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sm3partw1" - )] - fn _vsm3partw1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t; - } - unsafe { _vsm3partw1q_u32(a, b, c) } +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vst3_f64(a: *mut f64, b: float64x1x3_t) { + core::ptr::write_unaligned(a.cast(), b) } -#[doc = "SM3PARTW2"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3partw2q_u32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,sm4")] -#[cfg_attr(test, assert_instr(sm3partw2))] -#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] -pub fn vsm3partw2q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3_lane_f64(a: *mut f64, b: float64x1x3_t) { + static_assert!(LANE == 0); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sm3partw2" + link_name = "llvm.aarch64.neon.st3lane.v1f64.p0" )] - fn _vsm3partw2q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t; + fn _vst3_lane_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t, n: i64, ptr: *mut i8); } - unsafe { _vsm3partw2q_u32(a, b, c) } + _vst3_lane_f64(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "SM3SS1"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3ss1q_u32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,sm4")] -#[cfg_attr(test, assert_instr(sm3ss1))] -#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] -pub fn vsm3ss1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3_lane_s64(a: *mut i64, b: int64x1x3_t) { + static_assert!(LANE == 0); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sm3ss1" + link_name = "llvm.aarch64.neon.st3lane.v1i64.p0" )] - fn _vsm3ss1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t; + fn _vst3_lane_s64(a: int64x1_t, b: int64x1_t, c: int64x1_t, n: i64, ptr: *mut i8); } - unsafe { _vsm3ss1q_u32(a, b, c) } + _vst3_lane_s64(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "SM3TT1A"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt1aq_u32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,sm4")] -#[cfg_attr(test, assert_instr(sm3tt1a, IMM2 = 0))] -#[rustc_legacy_const_generics(3)] -#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] -pub fn vsm3tt1aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { - static_assert_uimm_bits!(IMM2, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sm3tt1a" - )] - fn _vsm3tt1aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t; - } - unsafe { _vsm3tt1aq_u32(a, b, c, IMM2 as i64) } +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst3_lane_p64(a: *mut p64, b: poly64x1x3_t) { + static_assert!(LANE == 0); + vst3_lane_s64::(transmute(a), transmute(b)) } -#[doc = "SM3TT1B"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt1bq_u32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,sm4")] -#[cfg_attr(test, assert_instr(sm3tt1b, IMM2 = 0))] -#[rustc_legacy_const_generics(3)] -#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] -pub fn vsm3tt1bq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { - static_assert_uimm_bits!(IMM2, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sm3tt1b" - )] - fn _vsm3tt1bq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t; - } - unsafe { _vsm3tt1bq_u32(a, b, c, IMM2 as i64) } +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst3_lane_u64(a: *mut u64, b: uint64x1x3_t) { + static_assert!(LANE == 0); + vst3_lane_s64::(transmute(a), transmute(b)) } -#[doc = "SM3TT2A"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt2aq_u32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,sm4")] -#[cfg_attr(test, assert_instr(sm3tt2a, IMM2 = 0))] -#[rustc_legacy_const_generics(3)] -#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] -pub fn vsm3tt2aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { - static_assert_uimm_bits!(IMM2, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sm3tt2a" - )] - fn _vsm3tt2aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t; - } - unsafe { _vsm3tt2aq_u32(a, b, c, IMM2 as i64) } +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3q_f64(a: *mut f64, b: float64x2x3_t) { + crate::core_arch::macros::interleaving_store!(f64, 2, 3, a, b) } -#[doc = "SM3TT2B"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt2bq_u32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,sm4")] -#[cfg_attr(test, assert_instr(sm3tt2b, IMM2 = 0))] -#[rustc_legacy_const_generics(3)] -#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] -pub fn vsm3tt2bq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { - static_assert_uimm_bits!(IMM2, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sm3tt2b" - )] - fn _vsm3tt2bq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t; - } - unsafe { _vsm3tt2bq_u32(a, b, c, IMM2 as i64) } +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3q_s64(a: *mut i64, b: int64x2x3_t) { + crate::core_arch::macros::interleaving_store!(i64, 2, 3, a, b) } -#[doc = "SM4 key"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm4ekeyq_u32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,sm4")] -#[cfg_attr(test, assert_instr(sm4ekey))] -#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] -pub fn vsm4ekeyq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3q_lane_f64(a: *mut f64, b: float64x2x3_t) { + static_assert_uimm_bits!(LANE, 1); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sm4ekey" + link_name = "llvm.aarch64.neon.st3lane.v2f64.p0" )] - fn _vsm4ekeyq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + fn _vst3q_lane_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t, n: i64, ptr: *mut i8); } - unsafe { _vsm4ekeyq_u32(a, b) } + _vst3q_lane_f64(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "SM4 encode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm4eq_u32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,sm4")] -#[cfg_attr(test, assert_instr(sm4e))] -#[unstable(feature = "stdarch_neon_sm4", issue = "117226")] -pub fn vsm4eq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3q_lane_s8(a: *mut i8, b: int8x16x3_t) { + static_assert_uimm_bits!(LANE, 4); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sm4e" + link_name = "llvm.aarch64.neon.st3lane.v16i8.p0" )] - fn _vsm4eq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + fn _vst3q_lane_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t, n: i64, ptr: *mut i8); } - unsafe { _vsm4eq_u32(a, b) } + _vst3q_lane_s8(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Unsigned saturating Accumulate of Signed value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqadd_u8)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(usqadd))] -pub fn vsqadd_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { +pub unsafe fn vst3q_lane_s64(a: *mut i64, b: int64x2x3_t) { + static_assert_uimm_bits!(LANE, 1); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.usqadd.v8i8" + link_name = "llvm.aarch64.neon.st3lane.v2i64.p0" )] - fn _vsqadd_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; + fn _vst3q_lane_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t, n: i64, ptr: *mut i8); } - unsafe { _vsqadd_u8(a, b) } + _vst3q_lane_s64(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Unsigned saturating Accumulate of Signed value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddq_u8)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst3q_lane_p64(a: *mut p64, b: poly64x2x3_t) { + static_assert_uimm_bits!(LANE, 1); + vst3q_lane_s64::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(usqadd))] -pub fn vsqaddq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst3q_lane_u8(a: *mut u8, b: uint8x16x3_t) { + static_assert_uimm_bits!(LANE, 4); + vst3q_lane_s8::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst3q_lane_u64(a: *mut u64, b: uint64x2x3_t) { + static_assert_uimm_bits!(LANE, 1); + vst3q_lane_s64::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst3q_lane_p8(a: *mut p8, b: poly8x16x3_t) { + static_assert_uimm_bits!(LANE, 4); + vst3q_lane_s8::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3q_p64(a: *mut p64, b: poly64x2x3_t) { + vst3q_s64(transmute(a), transmute(b)) +} +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3q_u64(a: *mut u64, b: uint64x2x3_t) { + vst3q_s64(transmute(a), transmute(b)) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vst4_f64(a: *mut f64, b: float64x1x4_t) { + core::ptr::write_unaligned(a.cast(), b) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4_lane_f64(a: *mut f64, b: float64x1x4_t) { + static_assert!(LANE == 0); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.usqadd.v16i8" + link_name = "llvm.aarch64.neon.st4lane.v1f64.p0" )] - fn _vsqaddq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; + fn _vst4_lane_f64( + a: float64x1_t, + b: float64x1_t, + c: float64x1_t, + d: float64x1_t, + n: i64, + ptr: *mut i8, + ); } - unsafe { _vsqaddq_u8(a, b) } + _vst4_lane_f64(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Unsigned saturating Accumulate of Signed value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqadd_u16)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(usqadd))] -pub fn vsqadd_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { +pub unsafe fn vst4_lane_s64(a: *mut i64, b: int64x1x4_t) { + static_assert!(LANE == 0); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.usqadd.v4i16" + link_name = "llvm.aarch64.neon.st4lane.v1i64.p0" )] - fn _vsqadd_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; + fn _vst4_lane_s64( + a: int64x1_t, + b: int64x1_t, + c: int64x1_t, + d: int64x1_t, + n: i64, + ptr: *mut i8, + ); } - unsafe { _vsqadd_u16(a, b) } + _vst4_lane_s64(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Unsigned saturating Accumulate of Signed value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddq_u16)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst4_lane_p64(a: *mut p64, b: poly64x1x4_t) { + static_assert!(LANE == 0); + vst4_lane_s64::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst4_lane_u64(a: *mut u64, b: uint64x1x4_t) { + static_assert!(LANE == 0); + vst4_lane_s64::(transmute(a), transmute(b)) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4q_f64(a: *mut f64, b: float64x2x4_t) { + crate::core_arch::macros::interleaving_store!(f64, 2, 4, a, b) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4q_s64(a: *mut i64, b: int64x2x4_t) { + crate::core_arch::macros::interleaving_store!(i64, 2, 4, a, b) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(usqadd))] -pub fn vsqaddq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { +pub unsafe fn vst4q_lane_f64(a: *mut f64, b: float64x2x4_t) { + static_assert_uimm_bits!(LANE, 1); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.usqadd.v8i16" + link_name = "llvm.aarch64.neon.st4lane.v2f64.p0" )] - fn _vsqaddq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; + fn _vst4q_lane_f64( + a: float64x2_t, + b: float64x2_t, + c: float64x2_t, + d: float64x2_t, + n: i64, + ptr: *mut i8, + ); } - unsafe { _vsqaddq_u16(a, b) } + _vst4q_lane_f64(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Unsigned saturating Accumulate of Signed value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqadd_u32)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(usqadd))] -pub fn vsqadd_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { +pub unsafe fn vst4q_lane_s8(a: *mut i8, b: int8x16x4_t) { + static_assert_uimm_bits!(LANE, 4); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.usqadd.v2i32" + link_name = "llvm.aarch64.neon.st4lane.v16i8.p0" )] - fn _vsqadd_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; + fn _vst4q_lane_s8( + a: int8x16_t, + b: int8x16_t, + c: int8x16_t, + d: int8x16_t, + n: i64, + ptr: *mut i8, + ); } - unsafe { _vsqadd_u32(a, b) } + _vst4q_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Unsigned saturating Accumulate of Signed value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddq_u32)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(usqadd))] -pub fn vsqaddq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { +pub unsafe fn vst4q_lane_s64(a: *mut i64, b: int64x2x4_t) { + static_assert_uimm_bits!(LANE, 1); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.usqadd.v4i32" + link_name = "llvm.aarch64.neon.st4lane.v2i64.p0" )] - fn _vsqaddq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; + fn _vst4q_lane_s64( + a: int64x2_t, + b: int64x2_t, + c: int64x2_t, + d: int64x2_t, + n: i64, + ptr: *mut i8, + ); } - unsafe { _vsqaddq_u32(a, b) } + _vst4q_lane_s64(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Unsigned saturating Accumulate of Signed value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqadd_u64)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(usqadd))] -pub fn vsqadd_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.usqadd.v1i64" - )] - fn _vsqadd_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; - } - unsafe { _vsqadd_u64(a, b) } +#[target_feature(enable = "neon,aes")] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst4q_lane_p64(a: *mut p64, b: poly64x2x4_t) { + static_assert_uimm_bits!(LANE, 1); + vst4q_lane_s64::(transmute(a), transmute(b)) } -#[doc = "Unsigned saturating Accumulate of Signed value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddq_u64)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(usqadd))] -pub fn vsqaddq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.usqadd.v2i64" - )] - fn _vsqaddq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; - } - unsafe { _vsqaddq_u64(a, b) } +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst4q_lane_u8(a: *mut u8, b: uint8x16x4_t) { + static_assert_uimm_bits!(LANE, 4); + vst4q_lane_s8::(transmute(a), transmute(b)) } -#[doc = "Unsigned saturating accumulate of signed value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddb_u8)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(usqadd))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsqaddb_u8(a: u8, b: i8) -> u8 { - unsafe { simd_extract!(vsqadd_u8(vdup_n_u8(a), vdup_n_s8(b)), 0) } +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst4q_lane_u64(a: *mut u64, b: uint64x2x4_t) { + static_assert_uimm_bits!(LANE, 1); + vst4q_lane_s64::(transmute(a), transmute(b)) } -#[doc = "Unsigned saturating accumulate of signed value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddh_u16)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(usqadd))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsqaddh_u16(a: u16, b: i16) -> u16 { - unsafe { simd_extract!(vsqadd_u16(vdup_n_u16(a), vdup_n_s16(b)), 0) } +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst4q_lane_p8(a: *mut p8, b: poly8x16x4_t) { + static_assert_uimm_bits!(LANE, 4); + vst4q_lane_s8::(transmute(a), transmute(b)) } -#[doc = "Unsigned saturating accumulate of signed value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddd_u64)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(usqadd))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsqaddd_u64(a: u64, b: i64) -> u64 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.usqadd.i64" - )] - fn _vsqaddd_u64(a: u64, b: i64) -> u64; - } - unsafe { _vsqaddd_u64(a, b) } +#[target_feature(enable = "neon,aes")] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4q_p64(a: *mut p64, b: poly64x2x4_t) { + vst4q_s64(transmute(a), transmute(b)) } -#[doc = "Unsigned saturating accumulate of signed value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqadds_u32)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(usqadd))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsqadds_u32(a: u32, b: i32) -> u32 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.usqadd.i32" - )] - fn _vsqadds_u32(a: u32, b: i32) -> u32; - } - unsafe { _vsqadds_u32(a, b) } +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4q_u64(a: *mut u64, b: uint64x2x4_t) { + vst4q_s64(transmute(a), transmute(b)) +} +#[doc = "Store-Release a single-element structure from one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1_lane_f64)"] +#[doc = "## Safety"] +#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] +#[inline] +#[target_feature(enable = "neon,rcpc3")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] +#[cfg(target_has_atomic = "64")] +pub unsafe fn vstl1_lane_f64(ptr: *mut f64, val: float64x1_t) { + static_assert!(LANE == 0); + vstl1_lane_s64::(ptr as *mut i64, transmute(val)) +} +#[doc = "Store-Release a single-element structure from one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1q_lane_f64)"] +#[doc = "## Safety"] +#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] +#[inline] +#[target_feature(enable = "neon,rcpc3")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] +#[cfg(target_has_atomic = "64")] +pub unsafe fn vstl1q_lane_f64(ptr: *mut f64, val: float64x2_t) { + static_assert_uimm_bits!(LANE, 1); + vstl1q_lane_s64::(ptr as *mut i64, transmute(val)) +} +#[doc = "Store-Release a single-element structure from one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1_lane_u64)"] +#[doc = "## Safety"] +#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] +#[inline] +#[target_feature(enable = "neon,rcpc3")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] +#[cfg(target_has_atomic = "64")] +pub unsafe fn vstl1_lane_u64(ptr: *mut u64, val: uint64x1_t) { + static_assert!(LANE == 0); + vstl1_lane_s64::(ptr as *mut i64, transmute(val)) +} +#[doc = "Store-Release a single-element structure from one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1q_lane_u64)"] +#[doc = "## Safety"] +#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] +#[inline] +#[target_feature(enable = "neon,rcpc3")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] +#[cfg(target_has_atomic = "64")] +pub unsafe fn vstl1q_lane_u64(ptr: *mut u64, val: uint64x2_t) { + static_assert_uimm_bits!(LANE, 1); + vstl1q_lane_s64::(ptr as *mut i64, transmute(val)) +} +#[doc = "Store-Release a single-element structure from one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1_lane_p64)"] +#[doc = "## Safety"] +#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] +#[inline] +#[target_feature(enable = "neon,rcpc3")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] +#[cfg(target_has_atomic = "64")] +pub unsafe fn vstl1_lane_p64(ptr: *mut p64, val: poly64x1_t) { + static_assert!(LANE == 0); + vstl1_lane_s64::(ptr as *mut i64, transmute(val)) +} +#[doc = "Store-Release a single-element structure from one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1q_lane_p64)"] +#[doc = "## Safety"] +#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] +#[inline] +#[target_feature(enable = "neon,rcpc3")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] +#[cfg(target_has_atomic = "64")] +pub unsafe fn vstl1q_lane_p64(ptr: *mut p64, val: poly64x2_t) { + static_assert_uimm_bits!(LANE, 1); + vstl1q_lane_s64::(ptr as *mut i64, transmute(val)) } -#[doc = "Calculates the square root of each lane."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrt_f16)"] +#[doc = "Store-Release a single-element structure from one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1_lane_s64)"] +#[doc = "## Safety"] +#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] #[inline] -#[cfg_attr(test, assert_instr(fsqrt))] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vsqrt_f16(a: float16x4_t) -> float16x4_t { - unsafe { simd_fsqrt(a) } +#[target_feature(enable = "neon,rcpc3")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] +#[cfg(target_has_atomic = "64")] +pub unsafe fn vstl1_lane_s64(ptr: *mut i64, val: int64x1_t) { + static_assert!(LANE == 0); + let atomic_dst = ptr as *mut crate::sync::atomic::AtomicI64; + let lane: i64 = vget_lane_s64::(val); + (*atomic_dst).store(transmute(lane), crate::sync::atomic::Ordering::Release) } -#[doc = "Calculates the square root of each lane."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrtq_f16)"] +#[doc = "Store-Release a single-element structure from one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1q_lane_s64)"] +#[doc = "## Safety"] +#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] #[inline] -#[cfg_attr(test, assert_instr(fsqrt))] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vsqrtq_f16(a: float16x8_t) -> float16x8_t { - unsafe { simd_fsqrt(a) } +#[target_feature(enable = "neon,rcpc3")] +#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] +#[cfg(target_has_atomic = "64")] +pub unsafe fn vstl1q_lane_s64(ptr: *mut i64, val: int64x2_t) { + static_assert_uimm_bits!(LANE, 1); + let atomic_dst = ptr as *mut crate::sync::atomic::AtomicI64; + let lane: i64 = vgetq_lane_s64::(val); + (*atomic_dst).store(transmute(lane), crate::sync::atomic::Ordering::Release) } -#[doc = "Calculates the square root of each lane."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrt_f32)"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fsqrt))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsqrt_f32(a: float32x2_t) -> float32x2_t { - unsafe { simd_fsqrt(a) } +#[cfg_attr(test, assert_instr(fsub))] +pub fn vsub_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t { + unsafe { simd_sub(a, b) } } -#[doc = "Calculates the square root of each lane."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrtq_f32)"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_f64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fsqrt))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsqrtq_f32(a: float32x4_t) -> float32x4_t { - unsafe { simd_fsqrt(a) } +#[cfg_attr(test, assert_instr(fsub))] +pub fn vsubq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe { simd_sub(a, b) } } -#[doc = "Calculates the square root of each lane."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrt_f64)"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubd_s64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fsqrt))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsqrt_f64(a: float64x1_t) -> float64x1_t { - unsafe { simd_fsqrt(a) } +#[cfg_attr(test, assert_instr(sub))] +pub fn vsubd_s64(a: i64, b: i64) -> i64 { + a.wrapping_sub(b) } -#[doc = "Calculates the square root of each lane."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrtq_f64)"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubd_u64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fsqrt))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsqrtq_f64(a: float64x2_t) -> float64x2_t { - unsafe { simd_fsqrt(a) } +#[cfg_attr(test, assert_instr(sub))] +pub fn vsubd_u64(a: u64, b: u64) -> u64 { + a.wrapping_sub(b) } -#[doc = "Floating-point round to integral, using current rounding mode"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrth_f16)"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubh_f16)"] #[inline] #[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(fsqrt))] -pub fn vsqrth_f16(a: f16) -> f16 { - sqrtf16(a) +#[cfg_attr(test, assert_instr(fsub))] +pub fn vsubh_f16(a: f16, b: f16) -> f16 { + a - b } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s8)"] +#[doc = "Signed Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsri_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe { super::shift_right_and_insert!(u8, 8, N, a, b) } +#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubl2))] +pub fn vsubl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t { + unsafe { + let c: int16x8_t = simd_cast(vget_high_s8(a)); + let d: int16x8_t = simd_cast(vget_high_s8(b)); + simd_sub(c, d) + } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s8)"] +#[doc = "Signed Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsriq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - static_assert!(N >= 1 && N <= 8); - unsafe { super::shift_right_and_insert!(u8, 16, N, a, b) } +#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubl2))] +pub fn vsubl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { + unsafe { + let c: int32x4_t = simd_cast(vget_high_s16(a)); + let d: int32x4_t = simd_cast(vget_high_s16(b)); + simd_sub(c, d) + } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s16)"] +#[doc = "Signed Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsri_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe { super::shift_right_and_insert!(u16, 4, N, a, b) } +#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubl2))] +pub fn vsubl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { + unsafe { + let c: int64x2_t = simd_cast(vget_high_s32(a)); + let d: int64x2_t = simd_cast(vget_high_s32(b)); + simd_sub(c, d) + } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s16)"] +#[doc = "Unsigned Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsriq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - static_assert!(N >= 1 && N <= 16); - unsafe { super::shift_right_and_insert!(u16, 8, N, a, b) } +#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubl2))] +pub fn vsubl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t { + unsafe { + let c: uint16x8_t = simd_cast(vget_high_u8(a)); + let d: uint16x8_t = simd_cast(vget_high_u8(b)); + simd_sub(c, d) + } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s32)"] +#[doc = "Unsigned Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsri_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe { super::shift_right_and_insert!(u32, 2, N, a, b) } +#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubl2))] +pub fn vsubl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t { + unsafe { + let c: uint32x4_t = simd_cast(vget_high_u16(a)); + let d: uint32x4_t = simd_cast(vget_high_u16(b)); + simd_sub(c, d) + } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s32)"] +#[doc = "Unsigned Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsriq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - static_assert!(N >= 1 && N <= 32); - unsafe { super::shift_right_and_insert!(u32, 4, N, a, b) } +#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubl2))] +pub fn vsubl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t { + unsafe { + let c: uint64x2_t = simd_cast(vget_high_u32(a)); + let d: uint64x2_t = simd_cast(vget_high_u32(b)); + simd_sub(c, d) + } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s64)"] +#[doc = "Signed Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsri_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - static_assert!(N >= 1 && N <= 64); - unsafe { super::shift_right_and_insert!(u64, 1, N, a, b) } +#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubw2))] +pub fn vsubw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { + let c = vget_high_s8(b); + unsafe { simd_sub(a, simd_cast(c)) } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s64)"] +#[doc = "Signed Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsriq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - static_assert!(N >= 1 && N <= 64); - unsafe { super::shift_right_and_insert!(u64, 2, N, a, b) } +#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubw2))] +pub fn vsubw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { + let c = vget_high_s16(b); + unsafe { simd_sub(a, simd_cast(c)) } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u8)"] +#[doc = "Signed Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsri_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe { transmute(vsri_n_s8::(transmute(a), transmute(b))) } +#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubw2))] +pub fn vsubw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { + let c = vget_high_s32(b); + unsafe { simd_sub(a, simd_cast(c)) } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u8)"] +#[doc = "Unsigned Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsriq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - static_assert!(N >= 1 && N <= 8); - unsafe { transmute(vsriq_n_s8::(transmute(a), transmute(b))) } +#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubw2))] +pub fn vsubw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { + let c = vget_high_u8(b); + unsafe { simd_sub(a, simd_cast(c)) } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u16)"] +#[doc = "Unsigned Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsri_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe { transmute(vsri_n_s16::(transmute(a), transmute(b))) } +#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubw2))] +pub fn vsubw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { + let c = vget_high_u16(b); + unsafe { simd_sub(a, simd_cast(c)) } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u16)"] +#[doc = "Unsigned Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsriq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - static_assert!(N >= 1 && N <= 16); - unsafe { transmute(vsriq_n_s16::(transmute(a), transmute(b))) } +#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubw2))] +pub fn vsubw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { + let c = vget_high_u32(b); + unsafe { simd_sub(a, simd_cast(c)) } } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u32)"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + vqtbl1_s8(vcombine_s8(a, unsafe { crate::mem::zeroed() }), unsafe { + { + transmute(b) + } + }) +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_u8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + vqtbl1_u8(vcombine_u8(a, unsafe { crate::mem::zeroed() }), b) +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_p8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsri_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe { transmute(vsri_n_s32::(transmute(a), transmute(b))) } +pub fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t { + vqtbl1_p8(vcombine_p8(a, unsafe { crate::mem::zeroed() }), b) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u32)"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsriq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - static_assert!(N >= 1 && N <= 32); - unsafe { transmute(vsriq_n_s32::(transmute(a), transmute(b))) } +pub fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t { + vqtbl1_s8(vcombine_s8(a.0, a.1), vreinterpret_u8_s8(b)) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u64)"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_u8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsri_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - static_assert!(N >= 1 && N <= 64); - unsafe { transmute(vsri_n_s64::(transmute(a), transmute(b))) } +pub fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t { + vqtbl1_u8(vcombine_u8(a.0, a.1), b) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u64)"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_p8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsriq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - static_assert!(N >= 1 && N <= 64); - unsafe { transmute(vsriq_n_s64::(transmute(a), transmute(b))) } +pub fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t { + vqtbl1_p8(vcombine_p8(a.0, a.1), b) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p8)"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsri_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe { transmute(vsri_n_s8::(transmute(a), transmute(b))) } +pub fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t { + let x = int8x16x2_t( + vcombine_s8(a.0, a.1), + vcombine_s8(a.2, unsafe { crate::mem::zeroed() }), + ); + vqtbl2_s8(x, vreinterpret_u8_s8(b)) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p8)"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_u8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsriq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { - static_assert!(N >= 1 && N <= 8); - unsafe { transmute(vsriq_n_s8::(transmute(a), transmute(b))) } +pub fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t { + let x = uint8x16x2_t( + vcombine_u8(a.0, a.1), + vcombine_u8(a.2, unsafe { crate::mem::zeroed() }), + ); + vqtbl2_u8(x, b) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p16)"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_p8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsri_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe { transmute(vsri_n_s16::(transmute(a), transmute(b))) } +pub fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t { + let x = poly8x16x2_t( + vcombine_p8(a.0, a.1), + vcombine_p8(a.2, unsafe { crate::mem::zeroed() }), + ); + vqtbl2_p8(x, b) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p16)"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsriq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - static_assert!(N >= 1 && N <= 16); - unsafe { transmute(vsriq_n_s16::(transmute(a), transmute(b))) } +pub fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t { + let x = int8x16x2_t(vcombine_s8(a.0, a.1), vcombine_s8(a.2, a.3)); + vqtbl2_s8(x, vreinterpret_u8_s8(b)) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p64)"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_u8)"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsri_n_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t { - static_assert!(N >= 1 && N <= 64); - unsafe { transmute(vsri_n_s64::(transmute(a), transmute(b))) } +pub fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t { + let x = uint8x16x2_t(vcombine_u8(a.0, a.1), vcombine_u8(a.2, a.3)); + vqtbl2_u8(x, b) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p64)"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_p8)"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(sri, N = 1))] -#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vsriq_n_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { - static_assert!(N >= 1 && N <= 64); - unsafe { transmute(vsriq_n_s64::(transmute(a), transmute(b))) } +pub fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t { + let x = poly8x16x2_t(vcombine_p8(a.0, a.1), vcombine_p8(a.2, a.3)); + vqtbl2_p8(x, b) } -#[doc = "Shift right and insert"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsrid_n_s64)"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_s8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(bfxil, N = 2))] -pub fn vsrid_n_s64(a: i64, b: i64) -> i64 { - static_assert!(N >= 1 && N <= 64); - unsafe { transmute(vsri_n_s64::(transmute(a), transmute(b))) } +pub fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { + unsafe { + simd_select( + simd_lt::(c, transmute(i8x8::splat(8))), + vqtbx1_s8( + a, + vcombine_s8(b, crate::mem::zeroed()), + vreinterpret_u8_s8(c), + ), + a, + ) + } } -#[doc = "Shift right and insert"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsrid_n_u64)"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_u8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(bfxil, N = 2))] -pub fn vsrid_n_u64(a: u64, b: u64) -> u64 { - static_assert!(N >= 1 && N <= 64); - unsafe { transmute(vsri_n_u64::(transmute(a), transmute(b))) } -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,fp16")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst1_f16(ptr: *mut f16, a: float16x4_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +pub fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { + unsafe { + simd_select( + simd_lt::(c, transmute(u8x8::splat(8))), + vqtbx1_u8(a, vcombine_u8(b, crate::mem::zeroed()), c), + a, + ) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_p8)"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst1q_f16(ptr: *mut f16, a: float16x8_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(tbx))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t { + unsafe { + simd_select( + simd_lt::(c, transmute(u8x8::splat(8))), + vqtbx1_p8(a, vcombine_p8(b, crate::mem::zeroed()), c), + a, + ) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] +#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +pub fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t { + unsafe { + simd_select( + simd_lt::(c, transmute(i8x8::splat(16))), + vqtbx1_s8(a, vcombine_s8(b.0, b.1), vreinterpret_u8_s8(c)), + a, + ) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_u8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] +#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +pub fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t { + unsafe { + simd_select( + simd_lt::(c, transmute(u8x8::splat(16))), + vqtbx1_u8(a, vcombine_u8(b.0, b.1), c), + a, + ) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_p8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] +#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_f64(ptr: *mut f64, a: float64x1_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +pub fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t { + unsafe { + simd_select( + simd_lt::(c, transmute(u8x8::splat(16))), + vqtbx1_p8(a, vcombine_p8(b.0, b.1), c), + a, + ) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] +#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_f64(ptr: *mut f64, a: float64x2_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +pub fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t { + let x = int8x16x2_t( + vcombine_s8(b.0, b.1), + vcombine_s8(b.2, unsafe { crate::mem::zeroed() }), + ); + unsafe { + simd_select( + simd_lt::(c, transmute(i8x8::splat(24))), + vqtbx2_s8(a, x, vreinterpret_u8_s8(c)), + a, + ) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_u8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] +#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +pub fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t { + let x = uint8x16x2_t( + vcombine_u8(b.0, b.1), + vcombine_u8(b.2, unsafe { crate::mem::zeroed() }), + ); + unsafe { + simd_select( + simd_lt::(c, transmute(u8x8::splat(24))), + vqtbx2_u8(a, x, c), + a, + ) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_p8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] +#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +pub fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t { + let x = poly8x16x2_t( + vcombine_p8(b.0, b.1), + vcombine_p8(b.2, unsafe { crate::mem::zeroed() }), + ); + unsafe { + simd_select( + simd_lt::(c, transmute(u8x8::splat(24))), + vqtbx2_p8(a, x, c), + a, + ) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] +#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +pub fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t { + let x = int8x16x2_t(vcombine_s8(b.0, b.1), vcombine_s8(b.2, b.3)); + unsafe { + simd_select( + simd_lt::(c, transmute(i8x8::splat(32))), + vqtbx2_s8(a, x, vreinterpret_u8_s8(c)), + a, + ) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_u8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] +#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +pub fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t { + let x = uint8x16x2_t(vcombine_u8(b.0, b.1), vcombine_u8(b.2, b.3)); + unsafe { + simd_select( + simd_lt::(c, transmute(u8x8::splat(32))), + vqtbx2_u8(a, x, c), + a, + ) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_p8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] +#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +pub fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t { + let x = poly8x16x2_t(vcombine_p8(b.0, b.1), vcombine_p8(b.2, b.3)); + unsafe { + simd_select( + simd_lt::(c, transmute(u8x8::splat(32))), + vqtbx2_p8(a, x, c), + a, + ) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vtrn1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vtrn1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vtrn1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vtrn1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float64x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vtrn1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vtrn1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int32x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vtrn1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vtrn1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { + let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int64x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vtrn1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vtrn1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint32x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vtrn1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u64)"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vtrn1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint64x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p64)"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(str))] -#[allow(clippy::cast_ptr_alignment)] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) { - crate::ptr::write_unaligned(ptr.cast(), a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vtrn1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st1))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_f64_x2(a: *mut f64, b: float64x1x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x2.v1f64.p0" - )] - fn _vst1_f64_x2(a: float64x1_t, b: float64x1_t, ptr: *mut f64); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vtrn1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { + unsafe { + let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: poly64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: poly64x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } - _vst1_f64_x2(b.0, b.1, a) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st1))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_f64_x2(a: *mut f64, b: float64x2x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x2.v2f64.p0" - )] - fn _vst1q_f64_x2(a: float64x2_t, b: float64x2_t, ptr: *mut f64); - } - _vst1q_f64_x2(b.0, b.1, a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st1))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_f64_x3(a: *mut f64, b: float64x1x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x3.v1f64.p0" - )] - fn _vst1_f64_x3(a: float64x1_t, b: float64x1_t, c: float64x1_t, ptr: *mut f64); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - _vst1_f64_x3(b.0, b.1, b.2, a) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st1))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_f64_x3(a: *mut f64, b: float64x2x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x3.v2f64.p0" - )] - fn _vst1q_f64_x3(a: float64x2_t, b: float64x2_t, c: float64x2_t, ptr: *mut f64); - } - _vst1q_f64_x3(b.0, b.1, b.2, a) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st1))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_f64_x4(a: *mut f64, b: float64x1x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x4.v1f64.p0" - )] - fn _vst1_f64_x4( - a: float64x1_t, - b: float64x1_t, - c: float64x1_t, - d: float64x1_t, - ptr: *mut f64, - ); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } - _vst1_f64_x4(b.0, b.1, b.2, b.3, a) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st1))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_f64_x4(a: *mut f64, b: float64x2x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x4.v2f64.p0" - )] - fn _vst1q_f64_x4( - a: float64x2_t, - b: float64x2_t, - c: float64x2_t, - d: float64x2_t, - ptr: *mut f64, - ); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { + simd_shuffle!( + a, + b, + [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + ) } - _vst1q_f64_x4(b.0, b.1, b.2, b.3, a) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_lane_f64(a: *mut f64, b: float64x1_t) { - static_assert!(LANE == 0); - *a = simd_extract!(b, LANE as u32); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = simd_shuffle!( + a, + b, + [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_lane_f64(a: *mut f64, b: float64x2_t) { - static_assert_uimm_bits!(LANE, 1); - *a = simd_extract!(b, LANE as u32); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(stp))] -pub unsafe fn vst2_f64(a: *mut f64, b: float64x1x2_t) { - core::ptr::write_unaligned(a.cast(), b) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2_lane_f64(a: *mut f64, b: float64x1x2_t) { - static_assert!(LANE == 0); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2lane.v1f64.p0" - )] - fn _vst2_lane_f64(a: float64x1_t, b: float64x1_t, n: i64, ptr: *mut i8); - } - _vst2_lane_f64(b.0, b.1, LANE as i64, a as _) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2_lane_s64(a: *mut i64, b: int64x1x2_t) { - static_assert!(LANE == 0); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2lane.v1i64.p0" - )] - fn _vst2_lane_s64(a: int64x1_t, b: int64x1_t, n: i64, ptr: *mut i8); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } - _vst2_lane_s64(b.0, b.1, LANE as i64, a as _) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s32)"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[rustc_legacy_const_generics(2)] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2_lane_p64(a: *mut p64, b: poly64x1x2_t) { - static_assert!(LANE == 0); - vst2_lane_s64::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2_lane_u64(a: *mut u64, b: uint64x1x2_t) { - static_assert!(LANE == 0); - vst2_lane_s64::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st2))] -pub unsafe fn vst2q_f64(a: *mut f64, b: float64x2x2_t) { - crate::core_arch::macros::interleaving_store!(f64, 2, 2, a, b) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st2))] -pub unsafe fn vst2q_s64(a: *mut i64, b: int64x2x2_t) { - crate::core_arch::macros::interleaving_store!(i64, 2, 2, a, b) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2q_lane_f64(a: *mut f64, b: float64x2x2_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2lane.v2f64.p0" - )] - fn _vst2q_lane_f64(a: float64x2_t, b: float64x2_t, n: i64, ptr: *mut i8); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { + simd_shuffle!( + a, + b, + [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + ) } - _vst2q_lane_f64(b.0, b.1, LANE as i64, a as _) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2q_lane_s8(a: *mut i8, b: int8x16x2_t) { - static_assert_uimm_bits!(LANE, 4); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2lane.v16i8.p0" - )] - fn _vst2q_lane_s8(a: int8x16_t, b: int8x16_t, n: i64, ptr: *mut i8); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = simd_shuffle!( + a, + b, + [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } - _vst2q_lane_s8(b.0, b.1, LANE as i64, a as _) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2q_lane_s64(a: *mut i64, b: int64x2x2_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2lane.v2i64.p0" - )] - fn _vst2q_lane_s64(a: int64x2_t, b: int64x2_t, n: i64, ptr: *mut i8); - } - _vst2q_lane_s64(b.0, b.1, LANE as i64, a as _) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u16)"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[rustc_legacy_const_generics(2)] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2q_lane_p64(a: *mut p64, b: poly64x2x2_t) { - static_assert_uimm_bits!(LANE, 1); - vst2q_lane_s64::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2q_lane_u8(a: *mut u8, b: uint8x16x2_t) { - static_assert_uimm_bits!(LANE, 4); - vst2q_lane_s8::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2q_lane_u64(a: *mut u64, b: uint64x2x2_t) { - static_assert_uimm_bits!(LANE, 1); - vst2q_lane_s64::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2q_lane_p8(a: *mut p8, b: poly8x16x2_t) { - static_assert_uimm_bits!(LANE, 4); - vst2q_lane_s8::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u32)"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(st2))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2q_p64(a: *mut p64, b: poly64x2x2_t) { - vst2q_s64(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st2))] -pub unsafe fn vst2q_u64(a: *mut u64, b: uint64x2x2_t) { - vst2q_s64(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vst3_f64(a: *mut f64, b: float64x1x3_t) { - core::ptr::write_unaligned(a.cast(), b) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + unsafe { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst3_lane_f64(a: *mut f64, b: float64x1x3_t) { - static_assert!(LANE == 0); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3lane.v1f64.p0" - )] - fn _vst3_lane_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t, n: i64, ptr: *mut i8); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + unsafe { + simd_shuffle!( + a, + b, + [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + ) } - _vst3_lane_f64(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst3_lane_s64(a: *mut i64, b: int64x1x3_t) { - static_assert!(LANE == 0); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3lane.v1i64.p0" - )] - fn _vst3_lane_s64(a: int64x1_t, b: int64x1_t, c: int64x1_t, n: i64, ptr: *mut i8); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = simd_shuffle!( + a, + b, + [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } - _vst3_lane_s64(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_p16)"] #[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vst3_lane_p64(a: *mut p64, b: poly64x1x3_t) { - static_assert!(LANE == 0); - vst3_lane_s64::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vst3_lane_u64(a: *mut u64, b: uint64x1x3_t) { - static_assert!(LANE == 0); - vst3_lane_s64::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + unsafe { + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: poly16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3))] -pub unsafe fn vst3q_f64(a: *mut f64, b: float64x2x3_t) { - crate::core_arch::macros::interleaving_store!(f64, 2, 3, a, b) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn1) +)] +pub fn vtrn1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + unsafe { + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } +} +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } +} +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3))] -pub unsafe fn vst3q_s64(a: *mut i64, b: int64x2x3_t) { - crate::core_arch::macros::interleaving_store!(i64, 2, 3, a, b) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vtrn2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst3q_lane_f64(a: *mut f64, b: float64x2x3_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3lane.v2f64.p0" - )] - fn _vst3q_lane_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t, n: i64, ptr: *mut i8); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vtrn2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } - _vst3q_lane_f64(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst3q_lane_s8(a: *mut i8, b: int8x16x3_t) { - static_assert_uimm_bits!(LANE, 4); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3lane.v16i8.p0" - )] - fn _vst3q_lane_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t, n: i64, ptr: *mut i8); - } - _vst3q_lane_s8(b.0, b.1, b.2, LANE as i64, a as _) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vtrn2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst3q_lane_s64(a: *mut i64, b: int64x2x3_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3lane.v2i64.p0" - )] - fn _vst3q_lane_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t, n: i64, ptr: *mut i8); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vtrn2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float64x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } - _vst3q_lane_s64(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s32)"] #[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vst3q_lane_p64(a: *mut p64, b: poly64x2x3_t) { - static_assert_uimm_bits!(LANE, 1); - vst3q_lane_s64::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vtrn2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vst3q_lane_u8(a: *mut u8, b: uint8x16x3_t) { - static_assert_uimm_bits!(LANE, 4); - vst3q_lane_s8::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vtrn2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int32x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vst3q_lane_u64(a: *mut u64, b: uint64x2x3_t) { - static_assert_uimm_bits!(LANE, 1); - vst3q_lane_s64::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vtrn2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vst3q_lane_p8(a: *mut p8, b: poly8x16x3_t) { - static_assert_uimm_bits!(LANE, 4); - vst3q_lane_s8::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vtrn2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { + let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int64x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u32)"] #[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(st3))] -pub unsafe fn vst3q_p64(a: *mut p64, b: poly64x2x3_t) { - vst3q_s64(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vtrn2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3))] -pub unsafe fn vst3q_u64(a: *mut u64, b: uint64x2x3_t) { - vst3q_s64(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vtrn2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint32x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vst4_f64(a: *mut f64, b: float64x1x4_t) { - core::ptr::write_unaligned(a.cast(), b) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vtrn2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst4_lane_f64(a: *mut f64, b: float64x1x4_t) { - static_assert!(LANE == 0); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4lane.v1f64.p0" - )] - fn _vst4_lane_f64( - a: float64x1_t, - b: float64x1_t, - c: float64x1_t, - d: float64x1_t, - n: i64, - ptr: *mut i8, - ); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vtrn2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint64x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } - _vst4_lane_f64(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst4_lane_s64(a: *mut i64, b: int64x1x4_t) { - static_assert!(LANE == 0); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4lane.v1i64.p0" - )] - fn _vst4_lane_s64( - a: int64x1_t, - b: int64x1_t, - c: int64x1_t, - d: int64x1_t, - n: i64, - ptr: *mut i8, - ); - } - _vst4_lane_s64(b.0, b.1, b.2, b.3, LANE as i64, a as _) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vtrn2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p64)"] #[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vst4_lane_p64(a: *mut p64, b: poly64x1x4_t) { - static_assert!(LANE == 0); - vst4_lane_s64::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vtrn2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { + unsafe { + let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: poly64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: poly64x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vst4_lane_u64(a: *mut u64, b: uint64x1x4_t) { - static_assert!(LANE == 0); - vst4_lane_s64::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4))] -pub unsafe fn vst4q_f64(a: *mut f64, b: float64x2x4_t) { - crate::core_arch::macros::interleaving_store!(f64, 2, 4, a, b) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4))] -pub unsafe fn vst4q_s64(a: *mut i64, b: int64x2x4_t) { - crate::core_arch::macros::interleaving_store!(i64, 2, 4, a, b) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst4q_lane_f64(a: *mut f64, b: float64x2x4_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4lane.v2f64.p0" - )] - fn _vst4q_lane_f64( - a: float64x2_t, - b: float64x2_t, - c: float64x2_t, - d: float64x2_t, - n: i64, - ptr: *mut i8, - ); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } - _vst4q_lane_f64(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst4q_lane_s8(a: *mut i8, b: int8x16x4_t) { - static_assert_uimm_bits!(LANE, 4); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4lane.v16i8.p0" - )] - fn _vst4q_lane_s8( - a: int8x16_t, - b: int8x16_t, - c: int8x16_t, - d: int8x16_t, - n: i64, - ptr: *mut i8, - ); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { + simd_shuffle!( + a, + b, + [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] + ) } - _vst4q_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst4q_lane_s64(a: *mut i64, b: int64x2x4_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4lane.v2i64.p0" - )] - fn _vst4q_lane_s64( - a: int64x2_t, - b: int64x2_t, - c: int64x2_t, - d: int64x2_t, - n: i64, - ptr: *mut i8, +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = simd_shuffle!( + a, + b, + [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } - _vst4q_lane_s64(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s16)"] #[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vst4q_lane_p64(a: *mut p64, b: poly64x2x4_t) { - static_assert_uimm_bits!(LANE, 1); - vst4q_lane_s64::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vst4q_lane_u8(a: *mut u8, b: uint8x16x4_t) { - static_assert_uimm_bits!(LANE, 4); - vst4q_lane_s8::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vst4q_lane_u64(a: *mut u64, b: uint64x2x4_t) { - static_assert_uimm_bits!(LANE, 1); - vst4q_lane_s64::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vst4q_lane_p8(a: *mut p8, b: poly8x16x4_t) { - static_assert_uimm_bits!(LANE, 4); - vst4q_lane_s8::(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s32)"] #[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(test, assert_instr(st4))] -pub unsafe fn vst4q_p64(a: *mut p64, b: poly64x2x4_t) { - vst4q_s64(transmute(a), transmute(b)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } } -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4))] -pub unsafe fn vst4q_u64(a: *mut u64, b: uint64x2x4_t) { - vst4q_s64(transmute(a), transmute(b)) -} -#[doc = "Store-Release a single-element structure from one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1_lane_f64)"] -#[doc = "## Safety"] -#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] -#[inline] -#[target_feature(enable = "neon,rcpc3")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] -#[cfg(target_has_atomic = "64")] -pub unsafe fn vstl1_lane_f64(ptr: *mut f64, val: float64x1_t) { - static_assert!(LANE == 0); - vstl1_lane_s64::(ptr as *mut i64, transmute(val)) -} -#[doc = "Store-Release a single-element structure from one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1q_lane_f64)"] -#[doc = "## Safety"] -#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] -#[inline] -#[target_feature(enable = "neon,rcpc3")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] -#[cfg(target_has_atomic = "64")] -pub unsafe fn vstl1q_lane_f64(ptr: *mut f64, val: float64x2_t) { - static_assert_uimm_bits!(LANE, 1); - vstl1q_lane_s64::(ptr as *mut i64, transmute(val)) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Store-Release a single-element structure from one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1_lane_u64)"] -#[doc = "## Safety"] -#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u8)"] #[inline] -#[target_feature(enable = "neon,rcpc3")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] -#[cfg(target_has_atomic = "64")] -pub unsafe fn vstl1_lane_u64(ptr: *mut u64, val: uint64x1_t) { - static_assert!(LANE == 0); - vstl1_lane_s64::(ptr as *mut i64, transmute(val)) +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } } -#[doc = "Store-Release a single-element structure from one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1q_lane_u64)"] -#[doc = "## Safety"] -#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u8)"] #[inline] -#[target_feature(enable = "neon,rcpc3")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] -#[cfg(target_has_atomic = "64")] -pub unsafe fn vstl1q_lane_u64(ptr: *mut u64, val: uint64x2_t) { - static_assert_uimm_bits!(LANE, 1); - vstl1q_lane_s64::(ptr as *mut i64, transmute(val)) +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Store-Release a single-element structure from one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1_lane_p64)"] -#[doc = "## Safety"] -#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u8)"] #[inline] -#[target_feature(enable = "neon,rcpc3")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] -#[cfg(target_has_atomic = "64")] -pub unsafe fn vstl1_lane_p64(ptr: *mut p64, val: poly64x1_t) { - static_assert!(LANE == 0); - vstl1_lane_s64::(ptr as *mut i64, transmute(val)) +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { + simd_shuffle!( + a, + b, + [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] + ) + } } -#[doc = "Store-Release a single-element structure from one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1q_lane_p64)"] -#[doc = "## Safety"] -#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u8)"] #[inline] -#[target_feature(enable = "neon,rcpc3")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] -#[cfg(target_has_atomic = "64")] -pub unsafe fn vstl1q_lane_p64(ptr: *mut p64, val: poly64x2_t) { - static_assert_uimm_bits!(LANE, 1); - vstl1q_lane_s64::(ptr as *mut i64, transmute(val)) +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = simd_shuffle!( + a, + b, + [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } -#[doc = "Store-Release a single-element structure from one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1_lane_s64)"] -#[doc = "## Safety"] -#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u16)"] #[inline] -#[target_feature(enable = "neon,rcpc3")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] -#[cfg(target_has_atomic = "64")] -pub unsafe fn vstl1_lane_s64(ptr: *mut i64, val: int64x1_t) { - static_assert!(LANE == 0); - let atomic_dst = ptr as *mut crate::sync::atomic::AtomicI64; - let lane: i64 = simd_extract!(val, LANE as u32); - (*atomic_dst).store(transmute(lane), crate::sync::atomic::Ordering::Release) +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } } -#[doc = "Store-Release a single-element structure from one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1q_lane_s64)"] -#[doc = "## Safety"] -#[doc = " * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u16)"] #[inline] -#[target_feature(enable = "neon,rcpc3")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")] -#[cfg(target_has_atomic = "64")] -pub unsafe fn vstl1q_lane_s64(ptr: *mut i64, val: int64x2_t) { - static_assert_uimm_bits!(LANE, 1); - let atomic_dst = ptr as *mut crate::sync::atomic::AtomicI64; - let lane: i64 = simd_extract!(val, LANE as u32); - (*atomic_dst).store(transmute(lane), crate::sync::atomic::Ordering::Release) +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f64)"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(fsub))] -pub fn vsub_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t { - unsafe { simd_sub(a, b) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } } -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_f64)"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(fsub))] -pub fn vsubq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { - unsafe { simd_sub(a, b) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubd_s64)"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(sub))] -pub fn vsubd_s64(a: i64, b: i64) -> i64 { - a.wrapping_sub(b) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } } -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubd_u64)"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(sub))] -pub fn vsubd_u64(a: u64, b: u64) -> u64 { - a.wrapping_sub(b) +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubh_f16)"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_p8)"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(fsub))] -pub fn vsubh_f16(a: f16, b: f16) -> f16 { - a - b +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } } -#[doc = "Signed Subtract Long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s8)"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ssubl2))] -pub fn vsubl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { unsafe { - let c: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); - let d: int16x8_t = simd_cast(c); - let e: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); - let f: int16x8_t = simd_cast(e); - simd_sub(d, f) + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Signed Subtract Long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s16)"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ssubl2))] -pub fn vsubl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { unsafe { - let c: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]); - let d: int32x4_t = simd_cast(c); - let e: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]); - let f: int32x4_t = simd_cast(e); - simd_sub(d, f) + simd_shuffle!( + a, + b, + [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] + ) } } -#[doc = "Signed Subtract Long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s32)"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ssubl2))] -pub fn vsubl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { unsafe { - let c: int32x2_t = simd_shuffle!(a, a, [2, 3]); - let d: int64x2_t = simd_cast(c); - let e: int32x2_t = simd_shuffle!(b, b, [2, 3]); - let f: int64x2_t = simd_cast(e); - simd_sub(d, f) + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = simd_shuffle!( + a, + b, + [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } } -#[doc = "Unsigned Subtract Long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u8)"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_p16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } +} +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(usubl2))] -pub fn vsubl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { unsafe { - let c: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); - let d: uint16x8_t = simd_cast(c); - let e: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); - let f: uint16x8_t = simd_cast(e); - simd_sub(d, f) + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: poly16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Unsigned Subtract Long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u16)"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(usubl2))] -pub fn vsubl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t { - unsafe { - let c: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]); - let d: uint32x4_t = simd_cast(c); - let e: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]); - let f: uint32x4_t = simd_cast(e); - simd_sub(d, f) - } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } } -#[doc = "Unsigned Subtract Long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u32)"] +#[doc = "Transpose vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(usubl2))] -pub fn vsubl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(trn2) +)] +pub fn vtrn2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { unsafe { - let c: uint32x2_t = simd_shuffle!(a, a, [2, 3]); - let d: uint64x2_t = simd_cast(c); - let e: uint32x2_t = simd_shuffle!(b, b, [2, 3]); - let f: uint64x2_t = simd_cast(e); - simd_sub(d, f) + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Signed Subtract Wide"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s8)"] +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s64)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmtst))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ssubw2))] -pub fn vsubw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { +pub fn vtst_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t { unsafe { - let c: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); - simd_sub(a, simd_cast(c)) + let c: int64x1_t = simd_and(a, b); + let d: i64x1 = i64x1::new(0); + simd_ne(c, transmute(d)) } } -#[doc = "Signed Subtract Wide"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s16)"] +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s64)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmtst))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ssubw2))] -pub fn vsubw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { +pub fn vtstq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t { unsafe { - let c: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]); - simd_sub(a, simd_cast(c)) + let c: int64x2_t = simd_and(a, b); + let d: i64x2 = i64x2::new(0, 0); + simd_ne(c, transmute(d)) } } -#[doc = "Signed Subtract Wide"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s32)"] +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_p64)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmtst))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ssubw2))] -pub fn vsubw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { +pub fn vtst_p64(a: poly64x1_t, b: poly64x1_t) -> uint64x1_t { unsafe { - let c: int32x2_t = simd_shuffle!(b, b, [2, 3]); - simd_sub(a, simd_cast(c)) + let c: poly64x1_t = simd_and(a, b); + let d: i64x1 = i64x1::new(0); + simd_ne(c, transmute(d)) } } -#[doc = "Unsigned Subtract Wide"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u8)"] +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_p64)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmtst))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(usubw2))] -pub fn vsubw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { +pub fn vtstq_p64(a: poly64x2_t, b: poly64x2_t) -> uint64x2_t { unsafe { - let c: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); - simd_sub(a, simd_cast(c)) + let c: poly64x2_t = simd_and(a, b); + let d: i64x2 = i64x2::new(0, 0); + simd_ne(c, transmute(d)) } } -#[doc = "Unsigned Subtract Wide"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u16)"] +#[doc = "Unsigned compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u64)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmtst))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(usubw2))] -pub fn vsubw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { +pub fn vtst_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { unsafe { - let c: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]); - simd_sub(a, simd_cast(c)) + let c: uint64x1_t = simd_and(a, b); + let d: u64x1 = u64x1::new(0); + simd_ne(c, transmute(d)) } } -#[doc = "Unsigned Subtract Wide"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u32)"] +#[doc = "Unsigned compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u64)"] #[inline] #[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmtst))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(usubw2))] -pub fn vsubw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { +pub fn vtstq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { unsafe { - let c: uint32x2_t = simd_shuffle!(b, b, [2, 3]); - simd_sub(a, simd_cast(c)) + let c: uint64x2_t = simd_and(a, b); + let d: u64x2 = u64x2::new(0, 0); + simd_ne(c, transmute(d)) } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_s8)"] +#[doc = "Compare bitwise test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstd_s64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbl))] +#[cfg_attr(test, assert_instr(tst))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - vqtbl1_s8(vcombine_s8(a, unsafe { crate::mem::zeroed() }), unsafe { - { - transmute(b) - } - }) +pub fn vtstd_s64(a: i64, b: i64) -> u64 { + unsafe { transmute(vtst_s64(transmute(a), transmute(b))) } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_u8)"] +#[doc = "Compare bitwise test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstd_u64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbl))] +#[cfg_attr(test, assert_instr(tst))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - vqtbl1_u8(vcombine_u8(a, unsafe { crate::mem::zeroed() }), b) +pub fn vtstd_u64(a: u64, b: u64) -> u64 { + unsafe { transmute(vtst_u64(transmute(a), transmute(b))) } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_p8)"] +#[doc = "Signed saturating Accumulate of Unsigned value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadd_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t { - vqtbl1_p8(vcombine_p8(a, unsafe { crate::mem::zeroed() }), b) +#[cfg_attr(test, assert_instr(suqadd))] +pub fn vuqadd_s8(a: int8x8_t, b: uint8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.suqadd.v8i8" + )] + fn _vuqadd_s8(a: int8x8_t, b: uint8x8_t) -> int8x8_t; + } + unsafe { _vuqadd_s8(a, b) } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_s8)"] +#[doc = "Signed saturating Accumulate of Unsigned value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddq_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t { - unsafe { vqtbl1(transmute(vcombine_s8(a.0, a.1)), transmute(b)) } +#[cfg_attr(test, assert_instr(suqadd))] +pub fn vuqaddq_s8(a: int8x16_t, b: uint8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.suqadd.v16i8" + )] + fn _vuqaddq_s8(a: int8x16_t, b: uint8x16_t) -> int8x16_t; + } + unsafe { _vuqaddq_s8(a, b) } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_u8)"] +#[doc = "Signed saturating Accumulate of Unsigned value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadd_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t { - unsafe { transmute(vqtbl1(transmute(vcombine_u8(a.0, a.1)), b)) } +#[cfg_attr(test, assert_instr(suqadd))] +pub fn vuqadd_s16(a: int16x4_t, b: uint16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.suqadd.v4i16" + )] + fn _vuqadd_s16(a: int16x4_t, b: uint16x4_t) -> int16x4_t; + } + unsafe { _vuqadd_s16(a, b) } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_p8)"] +#[doc = "Signed saturating Accumulate of Unsigned value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddq_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t { - unsafe { transmute(vqtbl1(transmute(vcombine_p8(a.0, a.1)), b)) } +#[cfg_attr(test, assert_instr(suqadd))] +pub fn vuqaddq_s16(a: int16x8_t, b: uint16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.suqadd.v8i16" + )] + fn _vuqaddq_s16(a: int16x8_t, b: uint16x8_t) -> int16x8_t; + } + unsafe { _vuqaddq_s16(a, b) } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_s8)"] +#[doc = "Signed saturating Accumulate of Unsigned value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadd_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t { - let x = int8x16x2_t( - vcombine_s8(a.0, a.1), - vcombine_s8(a.2, unsafe { crate::mem::zeroed() }), - ); - unsafe { transmute(vqtbl2(transmute(x.0), transmute(x.1), transmute(b))) } +#[cfg_attr(test, assert_instr(suqadd))] +pub fn vuqadd_s32(a: int32x2_t, b: uint32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.suqadd.v2i32" + )] + fn _vuqadd_s32(a: int32x2_t, b: uint32x2_t) -> int32x2_t; + } + unsafe { _vuqadd_s32(a, b) } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_u8)"] +#[doc = "Signed saturating Accumulate of Unsigned value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddq_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t { - let x = uint8x16x2_t( - vcombine_u8(a.0, a.1), - vcombine_u8(a.2, unsafe { crate::mem::zeroed() }), - ); - unsafe { transmute(vqtbl2(transmute(x.0), transmute(x.1), b)) } +#[cfg_attr(test, assert_instr(suqadd))] +pub fn vuqaddq_s32(a: int32x4_t, b: uint32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.suqadd.v4i32" + )] + fn _vuqaddq_s32(a: int32x4_t, b: uint32x4_t) -> int32x4_t; + } + unsafe { _vuqaddq_s32(a, b) } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_p8)"] +#[doc = "Signed saturating Accumulate of Unsigned value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadd_s64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t { - let x = poly8x16x2_t( - vcombine_p8(a.0, a.1), - vcombine_p8(a.2, unsafe { crate::mem::zeroed() }), - ); - unsafe { transmute(vqtbl2(transmute(x.0), transmute(x.1), b)) } +#[cfg_attr(test, assert_instr(suqadd))] +pub fn vuqadd_s64(a: int64x1_t, b: uint64x1_t) -> int64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.suqadd.v1i64" + )] + fn _vuqadd_s64(a: int64x1_t, b: uint64x1_t) -> int64x1_t; + } + unsafe { _vuqadd_s64(a, b) } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_s8)"] +#[doc = "Signed saturating Accumulate of Unsigned value."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddq_s64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbl))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t { - let x = int8x16x2_t(vcombine_s8(a.0, a.1), vcombine_s8(a.2, a.3)); - unsafe { transmute(vqtbl2(transmute(x.0), transmute(x.1), transmute(b))) } +#[cfg_attr(test, assert_instr(suqadd))] +pub fn vuqaddq_s64(a: int64x2_t, b: uint64x2_t) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.suqadd.v2i64" + )] + fn _vuqaddq_s64(a: int64x2_t, b: uint64x2_t) -> int64x2_t; + } + unsafe { _vuqaddq_s64(a, b) } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_u8)"] +#[doc = "Signed saturating accumulate of unsigned value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddb_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbl))] +#[cfg_attr(test, assert_instr(suqadd))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t { - let x = uint8x16x2_t(vcombine_u8(a.0, a.1), vcombine_u8(a.2, a.3)); - unsafe { transmute(vqtbl2(transmute(x.0), transmute(x.1), b)) } +pub fn vuqaddb_s8(a: i8, b: u8) -> i8 { + vget_lane_s8::<0>(vuqadd_s8(vdup_n_s8(a), vdup_n_u8(b))) } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_p8)"] +#[doc = "Signed saturating accumulate of unsigned value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddh_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbl))] +#[cfg_attr(test, assert_instr(suqadd))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t { - let x = poly8x16x2_t(vcombine_p8(a.0, a.1), vcombine_p8(a.2, a.3)); - unsafe { transmute(vqtbl2(transmute(x.0), transmute(x.1), b)) } +pub fn vuqaddh_s16(a: i16, b: u16) -> i16 { + vget_lane_s16::<0>(vuqadd_s16(vdup_n_s16(a), vdup_n_u16(b))) } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_s8)"] +#[doc = "Signed saturating accumulate of unsigned value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddd_s64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbx))] +#[cfg_attr(test, assert_instr(suqadd))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { - unsafe { - simd_select( - simd_lt::(c, transmute(i8x8::splat(8))), - transmute(vqtbx1( - transmute(a), - transmute(vcombine_s8(b, crate::mem::zeroed())), - transmute(c), - )), - a, - ) +pub fn vuqaddd_s64(a: i64, b: u64) -> i64 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.suqadd.i64" + )] + fn _vuqaddd_s64(a: i64, b: u64) -> i64; } + unsafe { _vuqaddd_s64(a, b) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_u8)"] +#[doc = "Signed saturating accumulate of unsigned value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadds_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbx))] +#[cfg_attr(test, assert_instr(suqadd))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { +pub fn vuqadds_s32(a: i32, b: u32) -> i32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.suqadd.i32" + )] + fn _vuqadds_s32(a: i32, b: u32) -> i32; + } + unsafe { _vuqadds_s32(a, b) } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { unsafe { - simd_select( - simd_lt::(c, transmute(u8x8::splat(8))), - transmute(vqtbx1( - transmute(a), - transmute(vcombine_u8(b, crate::mem::zeroed())), - c, - )), - a, - ) + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_p8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbx))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t { +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { unsafe { - simd_select( - simd_lt::(c, transmute(u8x8::splat(8))), - transmute(vqtbx1( - transmute(a), - transmute(vcombine_p8(b, crate::mem::zeroed())), - c, - )), - a, - ) + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t { - unsafe { vqtbx1(transmute(a), transmute(vcombine_s8(b.0, b.1)), transmute(c)) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vuzp1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_u8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t { - unsafe { transmute(vqtbx1(transmute(a), transmute(vcombine_u8(b.0, b.1)), c)) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vuzp1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_p8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t { - unsafe { transmute(vqtbx1(transmute(a), transmute(vcombine_p8(b.0, b.1)), c)) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vuzp1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t { - let x = int8x16x2_t( - vcombine_s8(b.0, b.1), - vcombine_s8(b.2, unsafe { crate::mem::zeroed() }), - ); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vuzp1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { unsafe { - transmute(simd_select( - simd_lt::(transmute(c), transmute(i8x8::splat(24))), - transmute(vqtbx2( - transmute(a), - transmute(x.0), - transmute(x.1), - transmute(c), - )), - a, - )) + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float64x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_u8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t { - let x = uint8x16x2_t( - vcombine_u8(b.0, b.1), - vcombine_u8(b.2, unsafe { crate::mem::zeroed() }), - ); - unsafe { - transmute(simd_select( - simd_lt::(transmute(c), transmute(u8x8::splat(24))), - transmute(vqtbx2(transmute(a), transmute(x.0), transmute(x.1), c)), - a, - )) - } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vuzp1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_p8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t { - let x = poly8x16x2_t( - vcombine_p8(b.0, b.1), - vcombine_p8(b.2, unsafe { crate::mem::zeroed() }), - ); +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vuzp1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { unsafe { - transmute(simd_select( - simd_lt::(transmute(c), transmute(u8x8::splat(24))), - transmute(vqtbx2(transmute(a), transmute(x.0), transmute(x.1), c)), - a, - )) + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int32x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t { - unsafe { - vqtbx2( - transmute(a), - transmute(vcombine_s8(b.0, b.1)), - transmute(vcombine_s8(b.2, b.3)), - transmute(c), - ) - } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vuzp1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_u8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vuzp1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { unsafe { - transmute(vqtbx2( - transmute(a), - transmute(vcombine_u8(b.0, b.1)), - transmute(vcombine_u8(b.2, b.3)), - c, - )) + let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int64x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_p8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tbx))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t { - unsafe { - transmute(vqtbx2( - transmute(a), - transmute(vcombine_p8(b.0, b.1)), - transmute(vcombine_p8(b.2, b.3)), - c, - )) - } -} -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_f16)"] -#[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))] -pub fn vtrn1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { - unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } -} -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f16)"] -#[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))] -pub fn vtrn1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { - unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vuzp1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_f32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vtrn1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vuzp1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint32x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vtrn1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vuzp1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vtrn1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vuzp1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint64x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vtrn1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vuzp1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vtrn1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vuzp1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { + unsafe { + let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: poly64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: poly64x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vtrn1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vtrn1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))] -pub fn vtrn1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))] -pub fn vtrn1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))] -pub fn vtrn1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { unsafe { simd_shuffle!( a, b, - [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] ) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))] -pub fn vtrn1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = simd_shuffle!( + a, + b, + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))] -pub fn vtrn1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))] -pub fn vtrn1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))] -pub fn vtrn1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))] -pub fn vtrn1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { unsafe { - simd_shuffle!( - a, - b, - [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] - ) + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))] -pub fn vtrn1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))] -pub fn vtrn1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))] -pub fn vtrn1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_p8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))] -pub fn vtrn1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))] -pub fn vtrn1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { unsafe { simd_shuffle!( a, b, - [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] ) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_p16)"] -#[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))] -pub fn vtrn1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } -} -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))] -pub fn vtrn1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } -} -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_f16)"] -#[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))] -pub fn vtrn2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { - unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } -} -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f16)"] -#[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))] -pub fn vtrn2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { - unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = simd_shuffle!( + a, + b, + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_f32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] -pub fn vtrn2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] -pub fn vtrn2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] -pub fn vtrn2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] -pub fn vtrn2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] -pub fn vtrn2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] -pub fn vtrn2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] -pub fn vtrn2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))] -pub fn vtrn2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + unsafe { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))] -pub fn vtrn2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + unsafe { + simd_shuffle!( + a, + b, + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] + ) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))] -pub fn vtrn2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { unsafe { - simd_shuffle!( + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = simd_shuffle!( a, b, - [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] ) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))] -pub fn vtrn2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))] -pub fn vtrn2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + unsafe { + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: poly16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))] -pub fn vtrn2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))] -pub fn vtrn2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp1) +)] +pub fn vuzp1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + unsafe { + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))] -pub fn vtrn2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vuzp2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vuzp2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { unsafe { - simd_shuffle!( - a, - b, - [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] - ) + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))] -pub fn vtrn2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vuzp2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))] -pub fn vtrn2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vuzp2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float64x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vuzp2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))] -pub fn vtrn2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vuzp2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int32x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_p8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))] -pub fn vtrn2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vuzp2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))] -pub fn vtrn2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vuzp2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { unsafe { - simd_shuffle!( - a, - b, - [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] - ) + let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int64x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_p16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))] -pub fn vtrn2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vuzp2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } } -#[doc = "Transpose vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))] -pub fn vtrn2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vuzp2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint32x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Signed compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(cmtst))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtst_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t { - unsafe { - let c: int64x1_t = simd_and(a, b); - let d: i64x1 = i64x1::new(0); - simd_ne(c, transmute(d)) - } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vuzp2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } } -#[doc = "Signed compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(cmtst))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtstq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vuzp2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { unsafe { - let c: int64x2_t = simd_and(a, b); - let d: i64x2 = i64x2::new(0, 0); - simd_ne(c, transmute(d)) + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint64x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Signed compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_p64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(cmtst))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtst_p64(a: poly64x1_t, b: poly64x1_t) -> uint64x1_t { - unsafe { - let c: poly64x1_t = simd_and(a, b); - let d: i64x1 = i64x1::new(0); - simd_ne(c, transmute(d)) - } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vuzp2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } } -#[doc = "Signed compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_p64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(cmtst))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtstq_p64(a: poly64x2_t, b: poly64x2_t) -> uint64x2_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vuzp2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { unsafe { - let c: poly64x2_t = simd_and(a, b); - let d: i64x2 = i64x2::new(0, 0); - simd_ne(c, transmute(d)) + let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: poly64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: poly64x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Unsigned compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(cmtst))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtst_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - unsafe { - let c: uint64x1_t = simd_and(a, b); - let d: u64x1 = u64x1::new(0); - simd_ne(c, transmute(d)) - } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) } } -#[doc = "Unsigned compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(cmtst))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtstq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { unsafe { - let c: uint64x2_t = simd_and(a, b); - let d: u64x2 = u64x2::new(0, 0); - simd_ne(c, transmute(d)) + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Compare bitwise test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstd_s64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tst))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtstd_s64(a: i64, b: i64) -> u64 { - unsafe { transmute(vtst_s64(transmute(a), transmute(b))) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } } -#[doc = "Compare bitwise test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstd_u64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(tst))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vtstd_u64(a: u64, b: u64) -> u64 { - unsafe { transmute(vtst_u64(transmute(a), transmute(b))) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Signed saturating Accumulate of Unsigned value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadd_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(suqadd))] -pub fn vuqadd_s8(a: int8x8_t, b: uint8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.suqadd.v8i8" - )] - fn _vuqadd_s8(a: int8x8_t, b: uint8x8_t) -> int8x8_t; +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { + simd_shuffle!( + a, + b, + [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + ) } - unsafe { _vuqadd_s8(a, b) } } -#[doc = "Signed saturating Accumulate of Unsigned value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddq_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(suqadd))] -pub fn vuqaddq_s8(a: int8x16_t, b: uint8x16_t) -> int8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.suqadd.v16i8" - )] - fn _vuqaddq_s8(a: int8x16_t, b: uint8x16_t) -> int8x16_t; +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = simd_shuffle!( + a, + b, + [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } - unsafe { _vuqaddq_s8(a, b) } } -#[doc = "Signed saturating Accumulate of Unsigned value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadd_s16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(suqadd))] -pub fn vuqadd_s16(a: int16x4_t, b: uint16x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.suqadd.v4i16" - )] - fn _vuqadd_s16(a: int16x4_t, b: uint16x4_t) -> int16x4_t; - } - unsafe { _vuqadd_s16(a, b) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) } } -#[doc = "Signed saturating Accumulate of Unsigned value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddq_s16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(suqadd))] -pub fn vuqaddq_s16(a: int16x8_t, b: uint16x8_t) -> int16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.suqadd.v8i16" - )] - fn _vuqaddq_s16(a: int16x8_t, b: uint16x8_t) -> int16x8_t; +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vuqaddq_s16(a, b) } } -#[doc = "Signed saturating Accumulate of Unsigned value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadd_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s16)"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(suqadd))] -pub fn vuqadd_s32(a: int32x2_t, b: uint32x2_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.suqadd.v2i32" - )] - fn _vuqadd_s32(a: int32x2_t, b: uint32x2_t) -> int32x2_t; - } - unsafe { _vuqadd_s32(a, b) } +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } } -#[doc = "Signed saturating Accumulate of Unsigned value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddq_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(suqadd))] -pub fn vuqaddq_s32(a: int32x4_t, b: uint32x4_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.suqadd.v4i32" - )] - fn _vuqaddq_s32(a: int32x4_t, b: uint32x4_t) -> int32x4_t; +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } - unsafe { _vuqaddq_s32(a, b) } } -#[doc = "Signed saturating Accumulate of Unsigned value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadd_s64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(suqadd))] -pub fn vuqadd_s64(a: int64x1_t, b: uint64x1_t) -> int64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.suqadd.v1i64" - )] - fn _vuqadd_s64(a: int64x1_t, b: uint64x1_t) -> int64x1_t; - } - unsafe { _vuqadd_s64(a, b) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) } } -#[doc = "Signed saturating Accumulate of Unsigned value."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddq_s64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(suqadd))] -pub fn vuqaddq_s64(a: int64x2_t, b: uint64x2_t) -> int64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.suqadd.v2i64" - )] - fn _vuqaddq_s64(a: int64x2_t, b: uint64x2_t) -> int64x2_t; +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vuqaddq_s64(a, b) } } -#[doc = "Signed saturating accumulate of unsigned value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddb_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(suqadd))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vuqaddb_s8(a: i8, b: u8) -> i8 { - unsafe { simd_extract!(vuqadd_s8(vdup_n_s8(a), vdup_n_u8(b)), 0) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } } -#[doc = "Signed saturating accumulate of unsigned value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddh_s16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(suqadd))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vuqaddh_s16(a: i16, b: u16) -> i16 { - unsafe { simd_extract!(vuqadd_s16(vdup_n_s16(a), vdup_n_u16(b)), 0) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Signed saturating accumulate of unsigned value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddd_s64)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(suqadd))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vuqaddd_s64(a: i64, b: u64) -> i64 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.suqadd.i64" - )] - fn _vuqaddd_s64(a: i64, b: u64) -> i64; +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { + simd_shuffle!( + a, + b, + [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + ) } - unsafe { _vuqaddd_s64(a, b) } } -#[doc = "Signed saturating accumulate of unsigned value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadds_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(suqadd))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vuqadds_s32(a: i32, b: u32) -> i32 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.suqadd.i32" - )] - fn _vuqadds_s32(a: i32, b: u32) -> i32; +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = simd_shuffle!( + a, + b, + [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } - unsafe { _vuqadds_s32(a, b) } } #[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u16)"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))] -pub fn vuzp1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { - unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) } +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) } } #[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u16)"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))] -pub fn vuzp1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { - unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vuzp1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } } #[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vuzp1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vuzp1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) } } #[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vuzp1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vuzp1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } } #[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vuzp1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + unsafe { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vuzp1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + unsafe { + simd_shuffle!( + a, + b, + [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + ) + } } #[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))] -pub fn vuzp1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = simd_shuffle!( + a, + b, + [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))] -pub fn vuzp1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) } } #[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))] -pub fn vuzp1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { unsafe { - simd_shuffle!( - a, - b, - [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] - ) + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: poly16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } #[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))] -pub fn vuzp1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } } #[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))] -pub fn vuzp1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(uzp2) +)] +pub fn vuzp2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + unsafe { + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s32)"] +#[doc = "Exclusive OR and rotate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vxarq_u64)"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))] -pub fn vuzp1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) } +#[target_feature(enable = "neon,sha3")] +#[cfg_attr(test, assert_instr(xar, IMM6 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] +pub fn vxarq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(IMM6, 6); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.xar" + )] + fn _vxarq_u64(a: uint64x2_t, b: uint64x2_t, n: i64) -> uint64x2_t; + } + unsafe { _vxarq_u64(a, b, IMM6 as i64) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))] -pub fn vuzp1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))] -pub fn vuzp1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { unsafe { - simd_shuffle!( - a, - b, - [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] - ) + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))] -pub fn vuzp1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) } +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))] -pub fn vuzp1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))] -pub fn vuzp1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_p8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))] -pub fn vuzp1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))] -pub fn vuzp1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { unsafe { - simd_shuffle!( - a, - b, - [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] - ) + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_p16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))] -pub fn vuzp1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))] -pub fn vuzp1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float64x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_f16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s8)"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))] -pub fn vuzp2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { - unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) } +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s8)"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))] -pub fn vuzp2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { - unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_f32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] -pub fn vuzp2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { + simd_shuffle!( + a, + b, + [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] + ) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f64)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] -pub fn vuzp2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = simd_shuffle!( + a, + b, + [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] -pub fn vuzp2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s64)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] -pub fn vuzp2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] -pub fn vuzp2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u64)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] -pub fn vuzp2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p64)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] -pub fn vuzp2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))] -pub fn vuzp2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int32x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))] -pub fn vuzp2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))] -pub fn vuzp2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { unsafe { - simd_shuffle!( - a, - b, - [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] - ) + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))] -pub fn vuzp2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))] -pub fn vuzp2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { + let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int64x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))] -pub fn vuzp2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))] -pub fn vuzp2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))] -pub fn vuzp2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { unsafe { simd_shuffle!( a, b, - [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] ) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))] -pub fn vuzp2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = simd_shuffle!( + a, + b, + [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))] -pub fn vuzp2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))] -pub fn vuzp2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_p8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))] -pub fn vuzp2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))] -pub fn vuzp2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { unsafe { - simd_shuffle!( - a, - b, - [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] - ) + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_p16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))] -pub fn vuzp2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))] -pub fn vuzp2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } -} -#[doc = "Exclusive OR and rotate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vxarq_u64)"] -#[inline] -#[target_feature(enable = "neon,sha3")] -#[cfg_attr(test, assert_instr(xar, IMM6 = 0))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")] -pub fn vxarq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - static_assert_uimm_bits!(IMM6, 6); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.xar" - )] - fn _vxarq_u64(a: uint64x2_t, b: uint64x2_t, n: i64) -> uint64x2_t; +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint32x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } - unsafe { _vxarq_u64(a, b, IMM6 as i64) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u32)"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vzip1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u32)"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vzip1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { - unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vzip1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { unsafe { simd_shuffle!(a, b, [0, 2]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vzip1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint64x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vzip1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vzip1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + unsafe { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vzip1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { unsafe { simd_shuffle!( a, @@ -27396,301 +30608,559 @@ pub fn vzip1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vzip1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = simd_shuffle!( + a, + b, + [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vzip1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vzip1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + unsafe { + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: poly16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vzip1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vzip1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + unsafe { + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vzip1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { + unsafe { simd_shuffle!(a, b, [0, 2]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vzip1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip1) +)] +pub fn vzip1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { unsafe { - simd_shuffle!( - a, - b, - [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] - ) + let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: poly64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: poly64x2_t = simd_shuffle!(a, b, [0, 2]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vzip1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) } +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vzip1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vzip1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,fp16")] +#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vzip1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vzip1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vzip1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vzip1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { unsafe { - simd_shuffle!( - a, - b, - [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] - ) + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vzip1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vzip1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + unsafe { + let a: float64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float64x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))] -pub fn vzip1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { - unsafe { simd_shuffle!(a, b, [0, 2]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s8)"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] -pub fn vzip2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { - unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) } +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s8)"] #[inline] -#[target_feature(enable = "neon,fp16")] -#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] -pub fn vzip2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { - unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) } +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { + simd_shuffle!( + a, + b, + [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] + ) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] -pub fn vzip2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = simd_shuffle!( + a, + b, + [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] -pub fn vzip2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] -pub fn vzip2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] -pub fn vzip2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] -pub fn vzip2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { unsafe { - simd_shuffle!( - a, - b, - [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] - ) + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] -pub fn vzip2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { simd_shuffle!(a, b, [1, 3]) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] -pub fn vzip2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int32x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] -pub fn vzip2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe { simd_shuffle!(a, b, [1, 3]) } +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) } } #[doc = "Zip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] pub fn vzip2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) } + unsafe { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Zip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] pub fn vzip2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { unsafe { simd_shuffle!(a, b, [1, 3]) } } #[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { + let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int64x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Zip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] pub fn vzip2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) } } #[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Zip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] pub fn vzip2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { unsafe { simd_shuffle!( @@ -27701,65 +31171,229 @@ pub fn vzip2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { } } #[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = simd_shuffle!( + a, + b, + [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Zip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] pub fn vzip2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) } } #[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Zip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] pub fn vzip2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) } } #[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Zip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] pub fn vzip2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { unsafe { simd_shuffle!(a, b, [1, 3]) } } #[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint32x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Zip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] pub fn vzip2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) } } #[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Zip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] pub fn vzip2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { unsafe { simd_shuffle!(a, b, [1, 3]) } } #[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint64x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Zip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] pub fn vzip2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) } } #[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + unsafe { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Zip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] pub fn vzip2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { unsafe { simd_shuffle!( @@ -27770,29 +31404,123 @@ pub fn vzip2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { } } #[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_p8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = simd_shuffle!( + a, + b, + [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "Zip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] pub fn vzip2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) } } #[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + unsafe { + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: poly16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Zip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] pub fn vzip2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) } } #[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + unsafe { + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } +} +#[doc = "Zip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] pub fn vzip2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { unsafe { simd_shuffle!(a, b, [1, 3]) } } +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_p64)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr( + all(test, not(target_env = "msvc"), target_endian = "little"), + assert_instr(zip2) +)] +pub fn vzip2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { + unsafe { + let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: poly64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: poly64x2_t = simd_shuffle!(a, b, [1, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs index cab36b9b4baf7..c66702814cfb2 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs @@ -12,7 +12,6 @@ pub use self::generated::*; use crate::{ core_arch::{arm_shared::*, simd::*}, - hint::unreachable_unchecked, intrinsics::{simd::*, *}, mem::transmute, }; @@ -94,117 +93,6 @@ macro_rules! shift_right_and_insert { pub(crate) use shift_right_and_insert; -/// Duplicate vector element to vector or scalar -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))] -#[rustc_legacy_const_generics(1, 3)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vcopy_lane_s64(_a: int64x1_t, b: int64x1_t) -> int64x1_t { - static_assert!(N1 == 0); - static_assert!(N2 == 0); - b -} - -/// Duplicate vector element to vector or scalar -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))] -#[rustc_legacy_const_generics(1, 3)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vcopy_lane_u64(_a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - static_assert!(N1 == 0); - static_assert!(N2 == 0); - b -} - -/// Duplicate vector element to vector or scalar -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))] -#[rustc_legacy_const_generics(1, 3)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vcopy_lane_p64(_a: poly64x1_t, b: poly64x1_t) -> poly64x1_t { - static_assert!(N1 == 0); - static_assert!(N2 == 0); - b -} - -/// Duplicate vector element to vector or scalar -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))] -#[rustc_legacy_const_generics(1, 3)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vcopy_lane_f64( - _a: float64x1_t, - b: float64x1_t, -) -> float64x1_t { - static_assert!(N1 == 0); - static_assert!(N2 == 0); - b -} - -/// Duplicate vector element to vector or scalar -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))] -#[rustc_legacy_const_generics(1, 3)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vcopy_laneq_s64( - _a: int64x1_t, - b: int64x2_t, -) -> int64x1_t { - static_assert!(LANE1 == 0); - static_assert_uimm_bits!(LANE2, 1); - unsafe { transmute::(simd_extract!(b, LANE2 as u32)) } -} - -/// Duplicate vector element to vector or scalar -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))] -#[rustc_legacy_const_generics(1, 3)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vcopy_laneq_u64( - _a: uint64x1_t, - b: uint64x2_t, -) -> uint64x1_t { - static_assert!(LANE1 == 0); - static_assert_uimm_bits!(LANE2, 1); - unsafe { transmute::(simd_extract!(b, LANE2 as u32)) } -} - -/// Duplicate vector element to vector or scalar -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))] -#[rustc_legacy_const_generics(1, 3)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vcopy_laneq_p64( - _a: poly64x1_t, - b: poly64x2_t, -) -> poly64x1_t { - static_assert!(LANE1 == 0); - static_assert_uimm_bits!(LANE2, 1); - unsafe { transmute::(simd_extract!(b, LANE2 as u32)) } -} - -/// Duplicate vector element to vector or scalar -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))] -#[rustc_legacy_const_generics(1, 3)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vcopy_laneq_f64( - _a: float64x1_t, - b: float64x2_t, -) -> float64x1_t { - static_assert!(LANE1 == 0); - static_assert_uimm_bits!(LANE2, 1); - unsafe { transmute::(simd_extract!(b, LANE2 as u32)) } -} - /// Load multiple single-element structures to one, two, three, or four registers #[inline] #[target_feature(enable = "neon")] @@ -443,42 +331,6 @@ pub fn vmovq_n_f64(value: f64) -> float64x2_t { vdupq_n_f64(value) } -/// Duplicate vector element to vector or scalar -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vget_high_f64(a: float64x2_t) -> float64x1_t { - unsafe { float64x1_t([simd_extract!(a, 1)]) } -} - -/// Duplicate vector element to vector or scalar -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(ext))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vget_high_p64(a: poly64x2_t) -> poly64x1_t { - unsafe { transmute(u64x1::new(simd_extract!(a, 1))) } -} - -/// Duplicate vector element to vector or scalar -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vget_low_f64(a: float64x2_t) -> float64x1_t { - unsafe { float64x1_t([simd_extract!(a, 0)]) } -} - -/// Duplicate vector element to vector or scalar -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vget_low_p64(a: poly64x2_t) -> poly64x1_t { - unsafe { transmute(u64x1::new(simd_extract!(a, 0))) } -} - /// Duplicate vector element to vector or scalar #[inline] #[target_feature(enable = "neon")] @@ -493,29 +345,6 @@ pub fn vget_lane_f64(v: float64x1_t) -> f64 { unsafe { simd_extract!(v, IMM5 as u32) } } -/// Duplicate vector element to vector or scalar -#[inline] -#[target_feature(enable = "neon")] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, IMM5 = 0) -)] -pub fn vgetq_lane_f64(v: float64x2_t) -> f64 { - static_assert_uimm_bits!(IMM5, 1); - unsafe { simd_extract!(v, IMM5 as u32) } -} - -/// Vector combine -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(mov))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vcombine_f64(low: float64x1_t, high: float64x1_t) -> float64x2_t { - unsafe { simd_shuffle!(low, high, [0, 1]) } -} - /// Shift left #[inline] #[target_feature(enable = "neon")] @@ -781,38 +610,6 @@ mod tests { assert_eq!(r, e); } - #[simd_test(enable = "neon")] - fn test_vget_high_f64() { - let a = f64x2::new(1.0, 2.0); - let e = f64x1::new(2.0); - let r = f64x1::from(vget_high_f64(a.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vget_high_p64() { - let a = u64x2::new(1, 2); - let e = u64x1::new(2); - let r = u64x1::from(vget_high_p64(a.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vget_low_f64() { - let a = f64x2::new(1.0, 2.0); - let e = f64x1::new(1.0); - let r = f64x1::from(vget_low_f64(a.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vget_low_p64() { - let a = u64x2::new(1, 2); - let e = u64x1::new(1); - let r = u64x1::from(vget_low_p64(a.into())); - assert_eq!(r, e); - } - #[simd_test(enable = "neon")] fn test_vget_lane_f64() { let v = f64x1::new(1.0); @@ -820,15 +617,6 @@ mod tests { assert_eq!(r, 1.0); } - #[simd_test(enable = "neon")] - fn test_vgetq_lane_f64() { - let v = f64x2::new(0.0, 1.0); - let r = vgetq_lane_f64::<1>(v.into()); - assert_eq!(r, 1.0); - let r = vgetq_lane_f64::<0>(v.into()); - assert_eq!(r, 0.0); - } - #[simd_test(enable = "neon")] fn test_vcopy_lane_s64() { let a = i64x1::new(1); @@ -865,42 +653,6 @@ mod tests { assert_eq!(r, e); } - #[simd_test(enable = "neon")] - fn test_vcopy_laneq_s64() { - let a = i64x1::new(1); - let b = i64x2::new(0, 0x7F_FF_FF_FF_FF_FF_FF_FF); - let e = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF); - let r = i64x1::from(vcopy_laneq_s64::<0, 1>(a.into(), b.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vcopy_laneq_u64() { - let a = u64x1::new(1); - let b = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF); - let e = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); - let r = u64x1::from(vcopy_laneq_u64::<0, 1>(a.into(), b.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vcopy_laneq_p64() { - let a = u64x1::new(1); - let b = u64x2::new(0, 0x7F_FF_FF_FF_FF_FF_FF_FF); - let e = u64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF); - let r = u64x1::from(vcopy_laneq_p64::<0, 1>(a.into(), b.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vcopy_laneq_f64() { - let a = f64x1::from_array([1.]); - let b = f64x2::from_array([0., 0.5]); - let e = f64x1::from_array([0.5]); - let r = f64x1::from(vcopy_laneq_f64::<0, 1>(a.into(), b.into())); - assert_eq!(r, e); - } - #[simd_test(enable = "neon")] fn test_vbsl_f64() { let a = u64x1::new(0x8000000000000000); @@ -1037,7 +789,7 @@ mod tests { macro_rules! wide_store_load_roundtrip_fp16 { ($( $name:ident $args:tt);* $(;)?) => { $( - #[cfg_attr(miri, ignore)] + #[cfg_attr(miri, ignore)] // uses unsupported vendor intrinsics #[simd_test(enable = "neon,fp16")] #[cfg(not(target_arch = "arm64ec"))] unsafe fn $name() { @@ -1308,7 +1060,7 @@ mod tests { macro_rules! lane_wide_store_load_roundtrip_neon { ($( $name:ident $args:tt);* $(;)?) => { $( - #[cfg_attr(miri, ignore)] + #[cfg_attr(miri, ignore)] // uses unsupported vendor intrinsics #[simd_test(enable = "neon")] unsafe fn $name() { lane_wide_store_load_roundtrip! $args; diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs index 84c1a91adf79f..5284a3c44af49 100644 --- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs @@ -1659,7 +1659,7 @@ pub fn vabsq_s32(a: int32x4_t) -> int32x4_t { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vabsh_f16(a: f16) -> f16 { - unsafe { simd_extract!(vabs_f16(vdup_n_f16(a)), 0) } + vget_lane_f16::<0>(vabs_f16(vdup_n_f16(a))) } #[doc = "Floating-point Add (vector)."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vadd_f16)"] @@ -2189,7 +2189,11 @@ pub fn vaddh_f16(a: f16, b: f16) -> f16 { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), assert_instr(addhn2) )] #[cfg_attr( @@ -2203,7 +2207,7 @@ pub fn vaddh_f16(a: f16, b: f16) -> f16 { pub fn vaddhn_high_s16(r: int8x8_t, a: int16x8_t, b: int16x8_t) -> int8x16_t { unsafe { let x = simd_cast(simd_shr(simd_add(a, b), int16x8_t::splat(8))); - simd_shuffle!(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) + vcombine_s8(r, x) } } #[doc = "Add returning High Narrow (high half)."] @@ -2213,7 +2217,11 @@ pub fn vaddhn_high_s16(r: int8x8_t, a: int16x8_t, b: int16x8_t) -> int8x16_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), assert_instr(addhn2) )] #[cfg_attr( @@ -2227,7 +2235,7 @@ pub fn vaddhn_high_s16(r: int8x8_t, a: int16x8_t, b: int16x8_t) -> int8x16_t { pub fn vaddhn_high_s32(r: int16x4_t, a: int32x4_t, b: int32x4_t) -> int16x8_t { unsafe { let x = simd_cast(simd_shr(simd_add(a, b), int32x4_t::splat(16))); - simd_shuffle!(r, x, [0, 1, 2, 3, 4, 5, 6, 7]) + vcombine_s16(r, x) } } #[doc = "Add returning High Narrow (high half)."] @@ -2237,7 +2245,11 @@ pub fn vaddhn_high_s32(r: int16x4_t, a: int32x4_t, b: int32x4_t) -> int16x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), assert_instr(addhn2) )] #[cfg_attr( @@ -2251,7 +2263,7 @@ pub fn vaddhn_high_s32(r: int16x4_t, a: int32x4_t, b: int32x4_t) -> int16x8_t { pub fn vaddhn_high_s64(r: int32x2_t, a: int64x2_t, b: int64x2_t) -> int32x4_t { unsafe { let x = simd_cast(simd_shr(simd_add(a, b), int64x2_t::splat(32))); - simd_shuffle!(r, x, [0, 1, 2, 3]) + vcombine_s32(r, x) } } #[doc = "Add returning High Narrow (high half)."] @@ -2261,7 +2273,11 @@ pub fn vaddhn_high_s64(r: int32x2_t, a: int64x2_t, b: int64x2_t) -> int32x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), assert_instr(addhn2) )] #[cfg_attr( @@ -2275,7 +2291,7 @@ pub fn vaddhn_high_s64(r: int32x2_t, a: int64x2_t, b: int64x2_t) -> int32x4_t { pub fn vaddhn_high_u16(r: uint8x8_t, a: uint16x8_t, b: uint16x8_t) -> uint8x16_t { unsafe { let x = simd_cast(simd_shr(simd_add(a, b), uint16x8_t::splat(8))); - simd_shuffle!(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) + vcombine_u8(r, x) } } #[doc = "Add returning High Narrow (high half)."] @@ -2285,7 +2301,11 @@ pub fn vaddhn_high_u16(r: uint8x8_t, a: uint16x8_t, b: uint16x8_t) -> uint8x16_t #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), assert_instr(addhn2) )] #[cfg_attr( @@ -2299,7 +2319,7 @@ pub fn vaddhn_high_u16(r: uint8x8_t, a: uint16x8_t, b: uint16x8_t) -> uint8x16_t pub fn vaddhn_high_u32(r: uint16x4_t, a: uint32x4_t, b: uint32x4_t) -> uint16x8_t { unsafe { let x = simd_cast(simd_shr(simd_add(a, b), uint32x4_t::splat(16))); - simd_shuffle!(r, x, [0, 1, 2, 3, 4, 5, 6, 7]) + vcombine_u16(r, x) } } #[doc = "Add returning High Narrow (high half)."] @@ -2309,7 +2329,11 @@ pub fn vaddhn_high_u32(r: uint16x4_t, a: uint32x4_t, b: uint32x4_t) -> uint16x8_ #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), assert_instr(addhn2) )] #[cfg_attr( @@ -2323,7 +2347,7 @@ pub fn vaddhn_high_u32(r: uint16x4_t, a: uint32x4_t, b: uint32x4_t) -> uint16x8_ pub fn vaddhn_high_u64(r: uint32x2_t, a: uint64x2_t, b: uint64x2_t) -> uint32x4_t { unsafe { let x = simd_cast(simd_shr(simd_add(a, b), uint64x2_t::splat(32))); - simd_shuffle!(r, x, [0, 1, 2, 3]) + vcombine_u32(r, x) } } #[doc = "Add returning High Narrow."] @@ -2459,7 +2483,11 @@ pub fn vaddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), assert_instr(saddl2) )] #[cfg_attr( @@ -2471,9 +2499,9 @@ pub fn vaddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub fn vaddl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { + let a: int16x4_t = vget_high_s16(a); + let b: int16x4_t = vget_high_s16(b); unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]); - let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]); let a: int32x4_t = simd_cast(a); let b: int32x4_t = simd_cast(b); simd_add(a, b) @@ -2486,7 +2514,11 @@ pub fn vaddl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), assert_instr(saddl2) )] #[cfg_attr( @@ -2498,9 +2530,9 @@ pub fn vaddl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub fn vaddl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { + let a: int32x2_t = vget_high_s32(a); + let b: int32x2_t = vget_high_s32(b); unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [2, 3]); - let b: int32x2_t = simd_shuffle!(b, b, [2, 3]); let a: int64x2_t = simd_cast(a); let b: int64x2_t = simd_cast(b); simd_add(a, b) @@ -2513,7 +2545,11 @@ pub fn vaddl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), assert_instr(saddl2) )] #[cfg_attr( @@ -2525,9 +2561,9 @@ pub fn vaddl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub fn vaddl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t { + let a: int8x8_t = vget_high_s8(a); + let b: int8x8_t = vget_high_s8(b); unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); - let b: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); let a: int16x8_t = simd_cast(a); let b: int16x8_t = simd_cast(b); simd_add(a, b) @@ -2540,7 +2576,11 @@ pub fn vaddl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), assert_instr(uaddl2) )] #[cfg_attr( @@ -2552,9 +2592,9 @@ pub fn vaddl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub fn vaddl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t { + let a: uint16x4_t = vget_high_u16(a); + let b: uint16x4_t = vget_high_u16(b); unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]); - let b: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]); let a: uint32x4_t = simd_cast(a); let b: uint32x4_t = simd_cast(b); simd_add(a, b) @@ -2567,7 +2607,11 @@ pub fn vaddl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), assert_instr(uaddl2) )] #[cfg_attr( @@ -2579,9 +2623,9 @@ pub fn vaddl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub fn vaddl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t { + let a: uint32x2_t = vget_high_u32(a); + let b: uint32x2_t = vget_high_u32(b); unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [2, 3]); - let b: uint32x2_t = simd_shuffle!(b, b, [2, 3]); let a: uint64x2_t = simd_cast(a); let b: uint64x2_t = simd_cast(b); simd_add(a, b) @@ -2594,7 +2638,11 @@ pub fn vaddl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), assert_instr(uaddl2) )] #[cfg_attr( @@ -2606,9 +2654,9 @@ pub fn vaddl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub fn vaddl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t { + let a: uint8x8_t = vget_high_u8(a); + let b: uint8x8_t = vget_high_u8(b); unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); - let b: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); let a: uint16x8_t = simd_cast(a); let b: uint16x8_t = simd_cast(b); simd_add(a, b) @@ -2792,7 +2840,11 @@ pub fn vaddq_p128(a: p128, b: p128) -> p128 { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), assert_instr(saddw2) )] #[cfg_attr( @@ -2804,8 +2856,8 @@ pub fn vaddq_p128(a: p128, b: p128) -> p128 { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub fn vaddw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { + let b = vget_high_s16(b); unsafe { - let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]); let b: int32x4_t = simd_cast(b); simd_add(a, b) } @@ -2817,7 +2869,11 @@ pub fn vaddw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), assert_instr(saddw2) )] #[cfg_attr( @@ -2829,8 +2885,8 @@ pub fn vaddw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub fn vaddw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { + let b = vget_high_s32(b); unsafe { - let b: int32x2_t = simd_shuffle!(b, b, [2, 3]); let b: int64x2_t = simd_cast(b); simd_add(a, b) } @@ -2842,7 +2898,11 @@ pub fn vaddw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), assert_instr(saddw2) )] #[cfg_attr( @@ -2854,8 +2914,8 @@ pub fn vaddw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub fn vaddw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { + let b = vget_high_s8(b); unsafe { - let b: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); let b: int16x8_t = simd_cast(b); simd_add(a, b) } @@ -2867,7 +2927,11 @@ pub fn vaddw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), assert_instr(uaddw2) )] #[cfg_attr( @@ -2879,8 +2943,8 @@ pub fn vaddw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub fn vaddw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { + let b = vget_high_u16(b); unsafe { - let b: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]); let b: uint32x4_t = simd_cast(b); simd_add(a, b) } @@ -2892,7 +2956,11 @@ pub fn vaddw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), assert_instr(uaddw2) )] #[cfg_attr( @@ -2904,8 +2972,8 @@ pub fn vaddw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub fn vaddw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { + let b = vget_high_u32(b); unsafe { - let b: uint32x2_t = simd_shuffle!(b, b, [2, 3]); let b: uint64x2_t = simd_cast(b); simd_add(a, b) } @@ -2917,7 +2985,11 @@ pub fn vaddw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), assert_instr(uaddw2) )] #[cfg_attr( @@ -2929,8 +3001,8 @@ pub fn vaddw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub fn vaddw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { + let b = vget_high_u8(b); unsafe { - let b: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); let b: uint16x8_t = simd_cast(b); simd_add(a, b) } @@ -3082,6 +3154,7 @@ pub fn vaddw_u8(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t { #[doc = "AES single round encryption."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaesdq_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(test, assert_instr(aesd))] @@ -3105,8 +3178,52 @@ pub fn vaesdq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t { unsafe { _vaesdq_u8(data, key) } } #[doc = "AES single round encryption."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaesdq_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(aesd))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") +)] +pub fn vaesdq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.aesd" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aesd")] + fn _vaesdq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t; + } + unsafe { + let data: uint8x16_t = simd_shuffle!( + data, + data, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let key: uint8x16_t = simd_shuffle!( + key, + key, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let ret_val: uint8x16_t = _vaesdq_u8(data, key); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} +#[doc = "AES single round encryption."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaeseq_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(test, assert_instr(aese))] @@ -3129,9 +3246,53 @@ pub fn vaeseq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t { } unsafe { _vaeseq_u8(data, key) } } +#[doc = "AES single round encryption."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaeseq_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(aese))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") +)] +pub fn vaeseq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.aese" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aese")] + fn _vaeseq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t; + } + unsafe { + let data: uint8x16_t = simd_shuffle!( + data, + data, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let key: uint8x16_t = simd_shuffle!( + key, + key, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let ret_val: uint8x16_t = _vaeseq_u8(data, key); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} #[doc = "AES inverse mix columns."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaesimcq_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(test, assert_instr(aesimc))] @@ -3154,9 +3315,48 @@ pub fn vaesimcq_u8(data: uint8x16_t) -> uint8x16_t { } unsafe { _vaesimcq_u8(data) } } +#[doc = "AES inverse mix columns."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaesimcq_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(aesimc))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") +)] +pub fn vaesimcq_u8(data: uint8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.aesimc" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aesimc")] + fn _vaesimcq_u8(data: uint8x16_t) -> uint8x16_t; + } + unsafe { + let data: uint8x16_t = simd_shuffle!( + data, + data, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let ret_val: uint8x16_t = _vaesimcq_u8(data); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} #[doc = "AES mix columns."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaesmcq_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(test, assert_instr(aesmc))] @@ -3179,6 +3379,44 @@ pub fn vaesmcq_u8(data: uint8x16_t) -> uint8x16_t { } unsafe { _vaesmcq_u8(data) } } +#[doc = "AES mix columns."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaesmcq_u8)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(aesmc))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") +)] +pub fn vaesmcq_u8(data: uint8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.aesmc" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aesmc")] + fn _vaesmcq_u8(data: uint8x16_t) -> uint8x16_t; + } + unsafe { + let data: uint8x16_t = simd_shuffle!( + data, + data, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + let ret_val: uint8x16_t = _vaesmcq_u8(data); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } +} #[doc = "Vector bitwise and"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vand_s8)"] #[inline] @@ -7270,7 +7508,6 @@ pub fn vclzq_s32(a: int32x4_t) -> int32x4_t { #[doc = "Count leading zero bits"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))] @@ -7290,35 +7527,8 @@ pub fn vclz_u16(a: uint16x4_t) -> uint16x4_t { unsafe { transmute(vclz_s16(transmute(a))) } } #[doc = "Count leading zero bits"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u16)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(clz) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vclz_u16(a: uint16x4_t) -> uint16x4_t { - unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint16x4_t = transmute(vclz_s16(transmute(a))); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } -} -#[doc = "Count leading zero bits"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))] @@ -7338,35 +7548,8 @@ pub fn vclzq_u16(a: uint16x8_t) -> uint16x8_t { unsafe { transmute(vclzq_s16(transmute(a))) } } #[doc = "Count leading zero bits"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u16)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(clz) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vclzq_u16(a: uint16x8_t) -> uint16x8_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint16x8_t = transmute(vclzq_s16(transmute(a))); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } -} -#[doc = "Count leading zero bits"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))] @@ -7386,35 +7569,8 @@ pub fn vclz_u32(a: uint32x2_t) -> uint32x2_t { unsafe { transmute(vclz_s32(transmute(a))) } } #[doc = "Count leading zero bits"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u32)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(clz) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vclz_u32(a: uint32x2_t) -> uint32x2_t { - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint32x2_t = transmute(vclz_s32(transmute(a))); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } -} -#[doc = "Count leading zero bits"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))] @@ -7434,35 +7590,8 @@ pub fn vclzq_u32(a: uint32x4_t) -> uint32x4_t { unsafe { transmute(vclzq_s32(transmute(a))) } } #[doc = "Count leading zero bits"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u32)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(clz) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vclzq_u32(a: uint32x4_t) -> uint32x4_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint32x4_t = transmute(vclzq_s32(transmute(a))); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } -} -#[doc = "Count leading zero bits"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))] @@ -7482,35 +7611,8 @@ pub fn vclz_u8(a: uint8x8_t) -> uint8x8_t { unsafe { transmute(vclz_s8(transmute(a))) } } #[doc = "Count leading zero bits"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u8)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(clz) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vclz_u8(a: uint8x8_t) -> uint8x8_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(vclz_s8(transmute(a))); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } -} -#[doc = "Count leading zero bits"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))] @@ -7529,37 +7631,6 @@ pub fn vclz_u8(a: uint8x8_t) -> uint8x8_t { pub fn vclzq_u8(a: uint8x16_t) -> uint8x16_t { unsafe { transmute(vclzq_s8(transmute(a))) } } -#[doc = "Count leading zero bits"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u8)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(clz) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vclzq_u8(a: uint8x16_t) -> uint8x16_t { - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x16_t = transmute(vclzq_s8(transmute(a))); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } -} #[doc = "Population count per byte."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_s8)"] #[inline] @@ -7605,7 +7676,6 @@ pub fn vcntq_s8(a: int8x16_t) -> int8x16_t { #[doc = "Population count per byte."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] @@ -7625,9 +7695,8 @@ pub fn vcnt_u8(a: uint8x8_t) -> uint8x8_t { unsafe { transmute(vcnt_s8(transmute(a))) } } #[doc = "Population count per byte."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] @@ -7643,17 +7712,12 @@ pub fn vcnt_u8(a: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcnt_u8(a: uint8x8_t) -> uint8x8_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(vcnt_s8(transmute(a))); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vcntq_u8(a: uint8x16_t) -> uint8x16_t { + unsafe { transmute(vcntq_s8(transmute(a))) } } #[doc = "Population count per byte."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] @@ -7669,13 +7733,12 @@ pub fn vcnt_u8(a: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcntq_u8(a: uint8x16_t) -> uint8x16_t { - unsafe { transmute(vcntq_s8(transmute(a))) } +pub fn vcnt_p8(a: poly8x8_t) -> poly8x8_t { + unsafe { transmute(vcnt_s8(transmute(a))) } } #[doc = "Population count per byte."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] @@ -7691,77 +7754,61 @@ pub fn vcntq_u8(a: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcntq_u8(a: uint8x16_t) -> uint8x16_t { - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x16_t = transmute(vcntq_s8(transmute(a))); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vcntq_p8(a: poly8x16_t) -> poly8x16_t { + unsafe { transmute(vcntq_s8(transmute(a))) } } -#[doc = "Population count per byte."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_p8)"] +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cnt) -)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcnt_p8(a: poly8x8_t) -> poly8x8_t { - unsafe { transmute(vcnt_s8(transmute(a))) } +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(nop))] +pub fn vcombine_f16(a: float16x4_t, b: float16x4_t) -> float16x8_t { + unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) } } -#[doc = "Population count per byte."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_p8)"] +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cnt) -)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcnt_p8(a: poly8x8_t) -> poly8x8_t { +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(nop))] +pub fn vcombine_f16(a: float16x4_t, b: float16x4_t) -> float16x8_t { unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(vcnt_s8(transmute(a))); + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x8_t = simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]); simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Population count per byte."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_p8)"] +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f32)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cnt) -)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -7770,20 +7817,16 @@ pub fn vcnt_p8(a: poly8x8_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcntq_p8(a: poly8x16_t) -> poly8x16_t { - unsafe { transmute(vcntq_s8(transmute(a))) } +pub fn vcombine_f32(a: float32x2_t, b: float32x2_t) -> float32x4_t { + unsafe { simd_shuffle!(a, b, [0, 1, 2, 3]) } } -#[doc = "Population count per byte."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_p8)"] +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f32)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cnt) -)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -7792,40 +7835,36 @@ pub fn vcntq_p8(a: poly8x16_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcntq_p8(a: poly8x16_t) -> poly8x16_t { +pub fn vcombine_f32(a: float32x2_t, b: float32x2_t) -> float32x4_t { unsafe { - let a: poly8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x16_t = transmute(vcntq_s8(transmute(a))); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x4_t = simd_shuffle!(a, b, [0, 1, 2, 3]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } #[doc = "Join two smaller vectors into a single larger vector"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(nop))] -pub fn vcombine_f16(a: float16x4_t, b: float16x4_t) -> float16x8_t { - unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) } +pub fn vcombine_s8(a: int8x8_t, b: int8x8_t) -> int8x16_t { + unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } } #[doc = "Join two smaller vectors into a single larger vector"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -7837,12 +7876,23 @@ pub fn vcombine_f16(a: float16x4_t, b: float16x4_t) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcombine_f32(a: float32x2_t, b: float32x2_t) -> float32x4_t { - unsafe { simd_shuffle!(a, b, [0, 1, 2, 3]) } +pub fn vcombine_s8(a: int8x8_t, b: int8x8_t) -> int8x16_t { + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = + simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Join two smaller vectors into a single larger vector"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -7854,12 +7904,13 @@ pub fn vcombine_f32(a: float32x2_t, b: float32x2_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcombine_s8(a: int8x8_t, b: int8x8_t) -> int8x16_t { - unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } +pub fn vcombine_s16(a: int16x4_t, b: int16x4_t) -> int16x8_t { + unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) } } #[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -7872,11 +7923,17 @@ pub fn vcombine_s8(a: int8x8_t, b: int8x8_t) -> int8x16_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub fn vcombine_s16(a: int16x4_t, b: int16x4_t) -> int16x8_t { - unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) } + unsafe { + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int16x8_t = simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -7892,8 +7949,32 @@ pub fn vcombine_s32(a: int32x2_t, b: int32x2_t) -> int32x4_t { unsafe { simd_shuffle!(a, b, [0, 1, 2, 3]) } } #[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vcombine_s32(a: int32x2_t, b: int32x2_t) -> int32x4_t { + unsafe { + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int32x4_t = simd_shuffle!(a, b, [0, 1, 2, 3]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Join two smaller vectors into a single larger vector"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -7909,8 +7990,9 @@ pub fn vcombine_s64(a: int64x1_t, b: int64x1_t) -> int64x2_t { unsafe { simd_shuffle!(a, b, [0, 1]) } } #[doc = "Join two smaller vectors into a single larger vector"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -7922,12 +8004,16 @@ pub fn vcombine_s64(a: int64x1_t, b: int64x1_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcombine_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x16_t { - unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } +pub fn vcombine_s64(a: int64x1_t, b: int64x1_t) -> int64x2_t { + unsafe { + let ret_val: int64x2_t = simd_shuffle!(a, b, [0, 1]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Join two smaller vectors into a single larger vector"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -7939,12 +8025,13 @@ pub fn vcombine_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcombine_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x8_t { - unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) } +pub fn vcombine_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x16_t { + unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } } #[doc = "Join two smaller vectors into a single larger vector"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -7956,12 +8043,23 @@ pub fn vcombine_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcombine_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x4_t { - unsafe { simd_shuffle!(a, b, [0, 1, 2, 3]) } +pub fn vcombine_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x16_t { + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = + simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Join two smaller vectors into a single larger vector"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -7973,12 +8071,13 @@ pub fn vcombine_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcombine_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x2_t { - unsafe { simd_shuffle!(a, b, [0, 1]) } +pub fn vcombine_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x8_t { + unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) } } #[doc = "Join two smaller vectors into a single larger vector"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -7990,12 +8089,18 @@ pub fn vcombine_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcombine_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x16_t { - unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } +pub fn vcombine_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x8_t { + unsafe { + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint16x8_t = simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Join two smaller vectors into a single larger vector"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -8007,12 +8112,13 @@ pub fn vcombine_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcombine_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x8_t { - unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) } +pub fn vcombine_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x4_t { + unsafe { simd_shuffle!(a, b, [0, 1, 2, 3]) } } #[doc = "Join two smaller vectors into a single larger vector"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -8024,71 +8130,60 @@ pub fn vcombine_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcombine_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x2_t { - unsafe { simd_shuffle!(a, b, [0, 1]) } +pub fn vcombine_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x4_t { + unsafe { + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint32x4_t = simd_shuffle!(a, b, [0, 1, 2, 3]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f16)"] +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u64)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vcreate_f16(a: u64) -> float16x4_t { - unsafe { transmute(a) } +pub fn vcombine_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x2_t { + unsafe { simd_shuffle!(a, b, [0, 1]) } } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f16)"] +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u64)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vcreate_f16(a: u64) -> float16x4_t { +pub fn vcombine_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x2_t { unsafe { - let ret_val: float16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + let ret_val: uint64x2_t = simd_shuffle!(a, b, [0, 1]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f32)"] +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -8097,20 +8192,16 @@ pub fn vcreate_f16(a: u64) -> float16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcreate_f32(a: u64) -> float32x2_t { - unsafe { transmute(a) } +pub fn vcombine_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x16_t { + unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f32)"] +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -8119,23 +8210,26 @@ pub fn vcreate_f32(a: u64) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcreate_f32(a: u64) -> float32x2_t { +pub fn vcombine_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x16_t { unsafe { - let ret_val: float32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = + simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s8)"] +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -8144,20 +8238,16 @@ pub fn vcreate_f32(a: u64) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcreate_s8(a: u64) -> int8x8_t { - unsafe { transmute(a) } +pub fn vcombine_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x8_t { + unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) } } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s8)"] +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -8166,23 +8256,21 @@ pub fn vcreate_s8(a: u64) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcreate_s8(a: u64) -> int8x8_t { +pub fn vcombine_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x8_t { unsafe { - let ret_val: int8x8_t = transmute(a); + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: poly16x8_t = simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]); simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s16)"] +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p64)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -8191,20 +8279,16 @@ pub fn vcreate_s8(a: u64) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcreate_s16(a: u64) -> int16x4_t { - unsafe { transmute(a) } +pub fn vcombine_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x2_t { + unsafe { simd_shuffle!(a, b, [0, 1]) } } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s16)"] +#[doc = "Join two smaller vectors into a single larger vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p64)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -8213,16 +8297,15 @@ pub fn vcreate_s16(a: u64) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcreate_s16(a: u64) -> int16x4_t { +pub fn vcombine_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x2_t { unsafe { - let ret_val: int16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + let ret_val: poly64x2_t = simd_shuffle!(a, b, [0, 1]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } #[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -8230,21 +8313,22 @@ pub fn vcreate_s16(a: u64) -> int16x4_t { all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop) )] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcreate_s32(a: u64) -> int32x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vcreate_f16(a: u64) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -8260,14 +8344,11 @@ pub fn vcreate_s32(a: u64) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcreate_s32(a: u64) -> int32x2_t { - unsafe { - let ret_val: int32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vcreate_f32(a: u64) -> float32x2_t { + unsafe { transmute(a) } } #[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -8284,13 +8365,12 @@ pub fn vcreate_s32(a: u64) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcreate_s64(a: u64) -> int64x1_t { +pub fn vcreate_s8(a: u64) -> int8x8_t { unsafe { transmute(a) } } #[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -8306,13 +8386,12 @@ pub fn vcreate_s64(a: u64) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcreate_u8(a: u64) -> uint8x8_t { +pub fn vcreate_s16(a: u64) -> int16x4_t { unsafe { transmute(a) } } #[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -8328,16 +8407,12 @@ pub fn vcreate_u8(a: u64) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcreate_u8(a: u64) -> uint8x8_t { - unsafe { - let ret_val: uint8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vcreate_s32(a: u64) -> int32x2_t { + unsafe { transmute(a) } } #[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -8353,13 +8428,12 @@ pub fn vcreate_u8(a: u64) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcreate_u16(a: u64) -> uint16x4_t { +pub fn vcreate_s64(a: u64) -> int64x1_t { unsafe { transmute(a) } } #[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -8375,16 +8449,12 @@ pub fn vcreate_u16(a: u64) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcreate_u16(a: u64) -> uint16x4_t { - unsafe { - let ret_val: uint16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vcreate_u8(a: u64) -> uint8x8_t { + unsafe { transmute(a) } } #[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -8400,13 +8470,12 @@ pub fn vcreate_u16(a: u64) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vcreate_u32(a: u64) -> uint32x2_t { +pub fn vcreate_u16(a: u64) -> uint16x4_t { unsafe { transmute(a) } } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -8423,10 +8492,7 @@ pub fn vcreate_u32(a: u64) -> uint32x2_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub fn vcreate_u32(a: u64) -> uint32x2_t { - unsafe { - let ret_val: uint32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } + unsafe { transmute(a) } } #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u64)"] @@ -8452,7 +8518,6 @@ pub fn vcreate_u64(a: u64) -> uint64x1_t { #[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -8472,34 +8537,8 @@ pub fn vcreate_p8(a: u64) -> poly8x8_t { unsafe { transmute(a) } } #[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p8)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vcreate_p8(a: u64) -> poly8x8_t { - unsafe { - let ret_val: poly8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } -} -#[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -8519,31 +8558,6 @@ pub fn vcreate_p16(a: u64) -> poly16x4_t { unsafe { transmute(a) } } #[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p16)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vcreate_p16(a: u64) -> poly16x4_t { - unsafe { - let ret_val: poly16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } -} -#[doc = "Insert vector element from another vector element"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p64)"] #[inline] #[target_feature(enable = "neon,aes")] @@ -9573,35 +9587,6 @@ pub fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t { #[doc = "Dot product arithmetic (indexed)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_s32)"] #[inline] -#[cfg(target_endian = "little")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,dotprod")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sdot, LANE = 0) -)] -#[rustc_legacy_const_generics(3)] -#[cfg_attr( - not(target_arch = "arm"), - unstable(feature = "stdarch_neon_dotprod", issue = "117224") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vdot_lane_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { - let c: int32x2_t = transmute(c); - let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vdot_s32(a, b, transmute(c)) - } -} -#[doc = "Dot product arithmetic (indexed)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_s32)"] -#[inline] -#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[target_feature(enable = "neon,dotprod")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))] @@ -9620,49 +9605,13 @@ pub fn vdot_lane_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> )] pub fn vdot_lane_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: int8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: int32x2_t = transmute(c); - let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - let ret_val: int32x2_t = vdot_s32(a, b, transmute(c)); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } -} -#[doc = "Dot product arithmetic (indexed)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_s32)"] -#[inline] -#[cfg(target_endian = "little")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,dotprod")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sdot, LANE = 0) -)] -#[rustc_legacy_const_generics(3)] -#[cfg_attr( - not(target_arch = "arm"), - unstable(feature = "stdarch_neon_dotprod", issue = "117224") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vdotq_lane_s32(a: int32x4_t, b: int8x16_t, c: int8x8_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { - let c: int32x2_t = transmute(c); - let c: int32x4_t = - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vdotq_s32(a, b, transmute(c)) - } + let c = vreinterpret_s32_s8(c); + let c = vdup_lane_s32::(c); + vdot_s32(a, b, vreinterpret_s8_s32(c)) } #[doc = "Dot product arithmetic (indexed)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_s32)"] #[inline] -#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[target_feature(enable = "neon,dotprod")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))] @@ -9681,22 +9630,13 @@ pub fn vdotq_lane_s32(a: int32x4_t, b: int8x16_t, c: int8x8_t) )] pub fn vdotq_lane_s32(a: int32x4_t, b: int8x16_t, c: int8x8_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let b: int8x16_t = - simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let c: int8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: int32x2_t = transmute(c); - let c: int32x4_t = - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - let ret_val: int32x4_t = vdotq_s32(a, b, transmute(c)); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } + let c = vreinterpret_s32_s8(c); + let c = vdupq_lane_s32::(c); + vdotq_s32(a, b, vreinterpretq_s8_s32(c)) } #[doc = "Dot product arithmetic (indexed)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_u32)"] #[inline] -#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[target_feature(enable = "neon,dotprod")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))] @@ -9715,77 +9655,13 @@ pub fn vdotq_lane_s32(a: int32x4_t, b: int8x16_t, c: int8x8_t) )] pub fn vdot_lane_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - let c: uint32x2_t = transmute(c); - let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vdot_u32(a, b, transmute(c)) - } -} -#[doc = "Dot product arithmetic (indexed)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_u32)"] -#[inline] -#[cfg(target_endian = "big")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,dotprod")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(udot, LANE = 0) -)] -#[rustc_legacy_const_generics(3)] -#[cfg_attr( - not(target_arch = "arm"), - unstable(feature = "stdarch_neon_dotprod", issue = "117224") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vdot_lane_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: uint32x2_t = transmute(c); - let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - let ret_val: uint32x2_t = vdot_u32(a, b, transmute(c)); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } -} -#[doc = "Dot product arithmetic (indexed)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_u32)"] -#[inline] -#[cfg(target_endian = "little")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,dotprod")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(udot, LANE = 0) -)] -#[rustc_legacy_const_generics(3)] -#[cfg_attr( - not(target_arch = "arm"), - unstable(feature = "stdarch_neon_dotprod", issue = "117224") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vdotq_lane_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x8_t) -> uint32x4_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { - let c: uint32x2_t = transmute(c); - let c: uint32x4_t = - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vdotq_u32(a, b, transmute(c)) - } + let c = vreinterpret_u32_u8(c); + let c = vdup_lane_u32::(c); + vdot_u32(a, b, vreinterpret_u8_u32(c)) } #[doc = "Dot product arithmetic (indexed)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_u32)"] #[inline] -#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[target_feature(enable = "neon,dotprod")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))] @@ -9804,43 +9680,13 @@ pub fn vdotq_lane_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x8_ )] pub fn vdotq_lane_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x8_t) -> uint32x4_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let b: uint8x16_t = - simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: uint32x2_t = transmute(c); - let c: uint32x4_t = - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - let ret_val: uint32x4_t = vdotq_u32(a, b, transmute(c)); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } + let c = vreinterpret_u32_u8(c); + let c = vdupq_lane_u32::(c); + vdotq_u32(a, b, vreinterpretq_u8_u32(c)) } #[doc = "Dot product arithmetic (indexed)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_laneq_s32)"] #[inline] -#[cfg(target_endian = "little")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,dotprod")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sdot, LANE = 0) -)] -#[rustc_legacy_const_generics(3)] -#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")] -pub fn vdot_laneq_s32(a: int32x2_t, b: int8x8_t, c: int8x16_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { - let c: int32x4_t = transmute(c); - let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vdot_s32(a, b, transmute(c)) - } -} -#[doc = "Dot product arithmetic (indexed)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_laneq_s32)"] -#[inline] -#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[target_feature(enable = "neon,dotprod")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))] @@ -9852,21 +9698,13 @@ pub fn vdot_laneq_s32(a: int32x2_t, b: int8x8_t, c: int8x16_t) #[unstable(feature = "stdarch_neon_dotprod", issue = "117224")] pub fn vdot_laneq_s32(a: int32x2_t, b: int8x8_t, c: int8x16_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: int8x16_t = - simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let c: int32x4_t = transmute(c); - let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - let ret_val: int32x2_t = vdot_s32(a, b, transmute(c)); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } + let c = vreinterpretq_s32_s8(c); + let c = vdup_laneq_s32::(c); + vdot_s32(a, b, vreinterpret_s8_s32(c)) } #[doc = "Dot product arithmetic (indexed)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_laneq_s32)"] #[inline] -#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[target_feature(enable = "neon,dotprod")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))] @@ -9878,45 +9716,13 @@ pub fn vdot_laneq_s32(a: int32x2_t, b: int8x8_t, c: int8x16_t) #[unstable(feature = "stdarch_neon_dotprod", issue = "117224")] pub fn vdotq_laneq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - let c: int32x4_t = transmute(c); - let c: int32x4_t = - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vdotq_s32(a, b, transmute(c)) - } -} -#[doc = "Dot product arithmetic (indexed)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_laneq_s32)"] -#[inline] -#[cfg(target_endian = "big")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,dotprod")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sdot, LANE = 0) -)] -#[rustc_legacy_const_generics(3)] -#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")] -pub fn vdotq_laneq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let b: int8x16_t = - simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let c: int8x16_t = - simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let c: int32x4_t = transmute(c); - let c: int32x4_t = - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - let ret_val: int32x4_t = vdotq_s32(a, b, transmute(c)); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } + let c = vreinterpretq_s32_s8(c); + let c = vdupq_laneq_s32::(c); + vdotq_s32(a, b, vreinterpretq_s8_s32(c)) } #[doc = "Dot product arithmetic (indexed)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_laneq_u32)"] #[inline] -#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[target_feature(enable = "neon,dotprod")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))] @@ -9928,42 +9734,13 @@ pub fn vdotq_laneq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t #[unstable(feature = "stdarch_neon_dotprod", issue = "117224")] pub fn vdot_laneq_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x16_t) -> uint32x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - let c: uint32x4_t = transmute(c); - let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vdot_u32(a, b, transmute(c)) - } -} -#[doc = "Dot product arithmetic (indexed)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_laneq_u32)"] -#[inline] -#[cfg(target_endian = "big")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,dotprod")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(udot, LANE = 0) -)] -#[rustc_legacy_const_generics(3)] -#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")] -pub fn vdot_laneq_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x16_t) -> uint32x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: uint8x16_t = - simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let c: uint32x4_t = transmute(c); - let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - let ret_val: uint32x2_t = vdot_u32(a, b, transmute(c)); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } + let c = vreinterpretq_u32_u8(c); + let c = vdup_laneq_u32::(c); + vdot_u32(a, b, vreinterpret_u8_u32(c)) } #[doc = "Dot product arithmetic (indexed)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_laneq_u32)"] #[inline] -#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[target_feature(enable = "neon,dotprod")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))] @@ -9975,44 +9752,14 @@ pub fn vdot_laneq_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x16_ #[unstable(feature = "stdarch_neon_dotprod", issue = "117224")] pub fn vdotq_laneq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - let c: uint32x4_t = transmute(c); - let c: uint32x4_t = - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vdotq_u32(a, b, transmute(c)) - } -} -#[doc = "Dot product arithmetic (indexed)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_laneq_u32)"] -#[inline] -#[cfg(target_endian = "big")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,dotprod")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(udot, LANE = 0) -)] -#[rustc_legacy_const_generics(3)] -#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")] -pub fn vdotq_laneq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let b: uint8x16_t = - simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let c: uint8x16_t = - simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let c: uint32x4_t = transmute(c); - let c: uint32x4_t = - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - let ret_val: uint32x4_t = vdotq_u32(a, b, transmute(c)); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } + let c = vreinterpretq_u32_u8(c); + let c = vdupq_laneq_u32::(c); + vdotq_u32(a, b, vreinterpretq_u8_u32(c)) } #[doc = "Dot product arithmetic (vector)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_s32)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[target_feature(enable = "neon,dotprod")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot))] @@ -10040,8 +9787,45 @@ pub fn vdot_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t { unsafe { _vdot_s32(a, b, c) } } #[doc = "Dot product arithmetic (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,dotprod")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sdot) +)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_dotprod", issue = "117224") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdot_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sdot.v2i32.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sdot.v2i32.v8i8" + )] + fn _vdot_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t; + } + unsafe { + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let c: int8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int32x2_t = _vdot_s32(a, b, c); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Dot product arithmetic (vector)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_s32)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[target_feature(enable = "neon,dotprod")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot))] @@ -10069,8 +9853,47 @@ pub fn vdotq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t { unsafe { _vdotq_s32(a, b, c) } } #[doc = "Dot product arithmetic (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,dotprod")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sdot) +)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_dotprod", issue = "117224") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdotq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sdot.v4i32.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sdot.v4i32.v16i8" + )] + fn _vdotq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t; + } + unsafe { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let c: int8x16_t = + simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int32x4_t = _vdotq_s32(a, b, c); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Dot product arithmetic (vector)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_u32)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[target_feature(enable = "neon,dotprod")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot))] @@ -10098,8 +9921,45 @@ pub fn vdot_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t { unsafe { _vdot_u32(a, b, c) } } #[doc = "Dot product arithmetic (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,dotprod")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(udot) +)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_dotprod", issue = "117224") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdot_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.udot.v2i32.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.udot.v2i32.v8i8" + )] + fn _vdot_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t; + } + unsafe { + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint32x2_t = _vdot_u32(a, b, c); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Dot product arithmetic (vector)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_u32)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[target_feature(enable = "neon,dotprod")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot))] @@ -10126,9 +9986,48 @@ pub fn vdotq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t { } unsafe { _vdotq_u32(a, b, c) } } +#[doc = "Dot product arithmetic (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,dotprod")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(udot) +)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_dotprod", issue = "117224") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdotq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.udot.v4i32.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.udot.v4i32.v16i8" + )] + fn _vdotq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t; + } + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let c: uint8x16_t = + simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint32x4_t = _vdotq_u32(a, b, c); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] @@ -10152,8 +10051,9 @@ pub fn vdup_lane_f16(a: float16x4_t) -> float16x4_t { unsafe { simd_shuffle!(a, a, [N as u32; 4]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] @@ -10172,59 +10072,74 @@ pub fn vdup_lane_f16(a: float16x4_t) -> float16x4_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg(not(target_arch = "arm64ec"))] -pub fn vdupq_lane_f16(a: float16x4_t) -> float16x8_t { +pub fn vdup_lane_f16(a: float16x4_t) -> float16x4_t { static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, a, [N as u32; 8]) } + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float16x4_t = simd_shuffle!(a, a, [N as u32; 4]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 1) + assert_instr(dup, N = 2) )] #[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_lane_f32(a: float32x2_t) -> float32x2_t { - static_assert_uimm_bits!(N, 1); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vdupq_lane_f16(a: float16x4_t) -> float16x8_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 1) + assert_instr(dup, N = 2) )] #[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_lane_s32(a: int32x2_t) -> int32x2_t { - static_assert_uimm_bits!(N, 1); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vdupq_lane_f16(a: float16x4_t) -> float16x8_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float16x8_t = simd_shuffle!(a, a, [N as u32; 8]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] @@ -10241,13 +10156,14 @@ pub fn vdup_lane_s32(a: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_lane_u32(a: uint32x2_t) -> uint32x2_t { +pub fn vdup_lane_f32(a: float32x2_t) -> float32x2_t { static_assert_uimm_bits!(N, 1); unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] @@ -10264,13 +10180,18 @@ pub fn vdup_lane_u32(a: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_lane_f32(a: float32x2_t) -> float32x4_t { +pub fn vdup_lane_f32(a: float32x2_t) -> float32x2_t { static_assert_uimm_bits!(N, 1); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: float32x2_t = simd_shuffle!(a, a, [N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] @@ -10287,13 +10208,14 @@ pub fn vdupq_lane_f32(a: float32x2_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_lane_s32(a: int32x2_t) -> int32x4_t { +pub fn vdup_lane_s32(a: int32x2_t) -> int32x2_t { static_assert_uimm_bits!(N, 1); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } + unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] @@ -10310,19 +10232,24 @@ pub fn vdupq_lane_s32(a: int32x2_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_lane_u32(a: uint32x2_t) -> uint32x4_t { +pub fn vdup_lane_s32(a: int32x2_t) -> int32x2_t { static_assert_uimm_bits!(N, 1); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } + unsafe { + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: int32x2_t = simd_shuffle!(a, a, [N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 2) + assert_instr(dup, N = 1) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10333,19 +10260,20 @@ pub fn vdupq_lane_u32(a: uint32x2_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_lane_p16(a: poly16x4_t) -> poly16x4_t { - static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, a, [N as u32; 4]) } +pub fn vdup_lane_u32(a: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 2) + assert_instr(dup, N = 1) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10356,19 +10284,24 @@ pub fn vdup_lane_p16(a: poly16x4_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_lane_s16(a: int16x4_t) -> int16x4_t { - static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, a, [N as u32; 4]) } +pub fn vdup_lane_u32(a: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: uint32x2_t = simd_shuffle!(a, a, [N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 2) + assert_instr(dup, N = 1) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10379,19 +10312,20 @@ pub fn vdup_lane_s16(a: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_lane_u16(a: uint16x4_t) -> uint16x4_t { - static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, a, [N as u32; 4]) } +pub fn vdupq_lane_f32(a: float32x2_t) -> float32x4_t { + static_assert_uimm_bits!(N, 1); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 2) + assert_instr(dup, N = 1) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10402,19 +10336,24 @@ pub fn vdup_lane_u16(a: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_lane_p16(a: poly16x4_t) -> poly16x8_t { - static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, a, [N as u32; 8]) } +pub fn vdupq_lane_f32(a: float32x2_t) -> float32x4_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: float32x4_t = simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 2) + assert_instr(dup, N = 1) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10425,19 +10364,20 @@ pub fn vdupq_lane_p16(a: poly16x4_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_lane_s16(a: int16x4_t) -> int16x8_t { - static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, a, [N as u32; 8]) } +pub fn vdupq_lane_s32(a: int32x2_t) -> int32x4_t { + static_assert_uimm_bits!(N, 1); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 2) + assert_instr(dup, N = 1) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10448,19 +10388,24 @@ pub fn vdupq_lane_s16(a: int16x4_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_lane_u16(a: uint16x4_t) -> uint16x8_t { - static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, a, [N as u32; 8]) } +pub fn vdupq_lane_s32(a: int32x2_t) -> int32x4_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: int32x4_t = simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 4) + assert_instr(dup, N = 1) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10471,19 +10416,20 @@ pub fn vdupq_lane_u16(a: uint16x4_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_lane_p8(a: poly8x8_t) -> poly8x8_t { - static_assert_uimm_bits!(N, 3); - unsafe { simd_shuffle!(a, a, [N as u32; 8]) } +pub fn vdupq_lane_u32(a: uint32x2_t) -> uint32x4_t { + static_assert_uimm_bits!(N, 1); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 4) + assert_instr(dup, N = 1) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10494,19 +10440,24 @@ pub fn vdup_lane_p8(a: poly8x8_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_lane_s8(a: int8x8_t) -> int8x8_t { - static_assert_uimm_bits!(N, 3); - unsafe { simd_shuffle!(a, a, [N as u32; 8]) } +pub fn vdupq_lane_u32(a: uint32x2_t) -> uint32x4_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: uint32x4_t = simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 4) + assert_instr(dup, N = 2) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10517,19 +10468,20 @@ pub fn vdup_lane_s8(a: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_lane_u8(a: uint8x8_t) -> uint8x8_t { - static_assert_uimm_bits!(N, 3); - unsafe { simd_shuffle!(a, a, [N as u32; 8]) } +pub fn vdup_lane_p16(a: poly16x4_t) -> poly16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, a, [N as u32; 4]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 4) + assert_instr(dup, N = 2) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10540,19 +10492,24 @@ pub fn vdup_lane_u8(a: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_lane_p8(a: poly8x8_t) -> poly8x16_t { - static_assert_uimm_bits!(N, 3); - unsafe { simd_shuffle!(a, a, [N as u32; 16]) } +pub fn vdup_lane_p16(a: poly16x4_t) -> poly16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: poly16x4_t = simd_shuffle!(a, a, [N as u32; 4]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 4) + assert_instr(dup, N = 2) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10563,19 +10520,20 @@ pub fn vdupq_lane_p8(a: poly8x8_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_lane_s8(a: int8x8_t) -> int8x16_t { - static_assert_uimm_bits!(N, 3); - unsafe { simd_shuffle!(a, a, [N as u32; 16]) } +pub fn vdup_lane_s16(a: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, a, [N as u32; 4]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 4) + assert_instr(dup, N = 2) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10586,19 +10544,24 @@ pub fn vdupq_lane_s8(a: int8x8_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_lane_u8(a: uint8x8_t) -> uint8x16_t { - static_assert_uimm_bits!(N, 3); - unsafe { simd_shuffle!(a, a, [N as u32; 16]) } +pub fn vdup_lane_s16(a: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: int16x4_t = simd_shuffle!(a, a, [N as u32; 4]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, N = 0) + assert_instr(dup, N = 2) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10609,19 +10572,20 @@ pub fn vdupq_lane_u8(a: uint8x8_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_lane_s64(a: int64x1_t) -> int64x1_t { - static_assert!(N == 0); - a +pub fn vdup_lane_u16(a: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, a, [N as u32; 4]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, N = 0) + assert_instr(dup, N = 2) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10632,66 +10596,73 @@ pub fn vdup_lane_s64(a: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_lane_u64(a: uint64x1_t) -> uint64x1_t { - static_assert!(N == 0); - a +pub fn vdup_lane_u16(a: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: uint16x4_t = simd_shuffle!(a, a, [N as u32; 4]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 4) + assert_instr(dup, N = 2) )] #[rustc_legacy_const_generics(1)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vdup_laneq_f16(a: float16x8_t) -> float16x4_t { - static_assert_uimm_bits!(N, 3); - unsafe { simd_shuffle!(a, a, [N as u32; 4]) } +pub fn vdupq_lane_p16(a: poly16x4_t) -> poly16x8_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 4) + assert_instr(dup, N = 2) )] #[rustc_legacy_const_generics(1)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vdupq_laneq_f16(a: float16x8_t) -> float16x8_t { - static_assert_uimm_bits!(N, 3); - unsafe { simd_shuffle!(a, a, [N as u32; 8]) } +pub fn vdupq_lane_p16(a: poly16x4_t) -> poly16x8_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: poly16x8_t = simd_shuffle!(a, a, [N as u32; 8]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 2) @@ -10705,16 +10676,17 @@ pub fn vdupq_laneq_f16(a: float16x8_t) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_laneq_f32(a: float32x4_t) -> float32x2_t { +pub fn vdupq_lane_s16(a: int16x4_t) -> int16x8_t { static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 2) @@ -10728,16 +10700,21 @@ pub fn vdup_laneq_f32(a: float32x4_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_laneq_s32(a: int32x4_t) -> int32x2_t { +pub fn vdupq_lane_s16(a: int16x4_t) -> int16x8_t { static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } + unsafe { + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: int16x8_t = simd_shuffle!(a, a, [N as u32; 8]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 2) @@ -10751,16 +10728,17 @@ pub fn vdup_laneq_s32(a: int32x4_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_laneq_u32(a: uint32x4_t) -> uint32x2_t { +pub fn vdupq_lane_u16(a: uint16x4_t) -> uint16x8_t { static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 2) @@ -10774,19 +10752,24 @@ pub fn vdup_laneq_u32(a: uint32x4_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_laneq_f32(a: float32x4_t) -> float32x4_t { +pub fn vdupq_lane_u16(a: uint16x4_t) -> uint16x8_t { static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } + unsafe { + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: uint16x8_t = simd_shuffle!(a, a, [N as u32; 8]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 2) + assert_instr(dup, N = 4) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10797,19 +10780,20 @@ pub fn vdupq_laneq_f32(a: float32x4_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_laneq_s32(a: int32x4_t) -> int32x4_t { - static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } +pub fn vdup_lane_p8(a: poly8x8_t) -> poly8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 2) + assert_instr(dup, N = 4) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10820,16 +10804,21 @@ pub fn vdupq_laneq_s32(a: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_laneq_u32(a: uint32x4_t) -> uint32x4_t { - static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } +pub fn vdup_lane_p8(a: poly8x8_t) -> poly8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = simd_shuffle!(a, a, [N as u32; 8]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4) @@ -10843,16 +10832,17 @@ pub fn vdupq_laneq_u32(a: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_laneq_p16(a: poly16x8_t) -> poly16x4_t { +pub fn vdup_lane_s8(a: int8x8_t) -> int8x8_t { static_assert_uimm_bits!(N, 3); - unsafe { simd_shuffle!(a, a, [N as u32; 4]) } + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4) @@ -10866,16 +10856,21 @@ pub fn vdup_laneq_p16(a: poly16x8_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_laneq_s16(a: int16x8_t) -> int16x4_t { +pub fn vdup_lane_s8(a: int8x8_t) -> int8x8_t { static_assert_uimm_bits!(N, 3); - unsafe { simd_shuffle!(a, a, [N as u32; 4]) } + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_shuffle!(a, a, [N as u32; 8]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4) @@ -10889,16 +10884,17 @@ pub fn vdup_laneq_s16(a: int16x8_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_laneq_u16(a: uint16x8_t) -> uint16x4_t { +pub fn vdup_lane_u8(a: uint8x8_t) -> uint8x8_t { static_assert_uimm_bits!(N, 3); - unsafe { simd_shuffle!(a, a, [N as u32; 4]) } + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4) @@ -10912,16 +10908,21 @@ pub fn vdup_laneq_u16(a: uint16x8_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_laneq_p16(a: poly16x8_t) -> poly16x8_t { +pub fn vdup_lane_u8(a: uint8x8_t) -> uint8x8_t { static_assert_uimm_bits!(N, 3); - unsafe { simd_shuffle!(a, a, [N as u32; 8]) } + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_shuffle!(a, a, [N as u32; 8]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4) @@ -10935,16 +10936,17 @@ pub fn vdupq_laneq_p16(a: poly16x8_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_laneq_s16(a: int16x8_t) -> int16x8_t { +pub fn vdupq_lane_p8(a: poly8x8_t) -> poly8x16_t { static_assert_uimm_bits!(N, 3); - unsafe { simd_shuffle!(a, a, [N as u32; 8]) } + unsafe { simd_shuffle!(a, a, [N as u32; 16]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4) @@ -10958,19 +10960,28 @@ pub fn vdupq_laneq_s16(a: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_laneq_u16(a: uint16x8_t) -> uint16x8_t { +pub fn vdupq_lane_p8(a: poly8x8_t) -> poly8x16_t { static_assert_uimm_bits!(N, 3); - unsafe { simd_shuffle!(a, a, [N as u32; 8]) } + unsafe { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = simd_shuffle!(a, a, [N as u32; 16]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 8) + assert_instr(dup, N = 4) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -10981,19 +10992,20 @@ pub fn vdupq_laneq_u16(a: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_laneq_p8(a: poly8x16_t) -> poly8x8_t { - static_assert_uimm_bits!(N, 4); - unsafe { simd_shuffle!(a, a, [N as u32; 8]) } +pub fn vdupq_lane_s8(a: int8x8_t) -> int8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shuffle!(a, a, [N as u32; 16]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 8) + assert_instr(dup, N = 4) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -11004,19 +11016,28 @@ pub fn vdup_laneq_p8(a: poly8x16_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_laneq_s8(a: int8x16_t) -> int8x8_t { - static_assert_uimm_bits!(N, 4); - unsafe { simd_shuffle!(a, a, [N as u32; 8]) } +pub fn vdupq_lane_s8(a: int8x8_t) -> int8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = simd_shuffle!(a, a, [N as u32; 16]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 8) + assert_instr(dup, N = 4) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -11027,19 +11048,20 @@ pub fn vdup_laneq_s8(a: int8x16_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_laneq_u8(a: uint8x16_t) -> uint8x8_t { - static_assert_uimm_bits!(N, 4); - unsafe { simd_shuffle!(a, a, [N as u32; 8]) } +pub fn vdupq_lane_u8(a: uint8x8_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shuffle!(a, a, [N as u32; 16]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 8) + assert_instr(dup, N = 4) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -11050,19 +11072,27 @@ pub fn vdup_laneq_u8(a: uint8x16_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_laneq_p8(a: poly8x16_t) -> poly8x16_t { - static_assert_uimm_bits!(N, 4); - unsafe { simd_shuffle!(a, a, [N as u32; 16]) } +pub fn vdupq_lane_u8(a: uint8x8_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = simd_shuffle!(a, a, [N as u32; 16]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 8) + assert_instr(nop, N = 0) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -11073,19 +11103,19 @@ pub fn vdupq_laneq_p8(a: poly8x16_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_laneq_s8(a: int8x16_t) -> int8x16_t { - static_assert_uimm_bits!(N, 4); - unsafe { simd_shuffle!(a, a, [N as u32; 16]) } +pub fn vdup_lane_s64(a: int64x1_t) -> int64x1_t { + static_assert!(N == 0); + a } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 8) + assert_instr(nop, N = 0) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -11096,98 +11126,134 @@ pub fn vdupq_laneq_s8(a: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_laneq_u8(a: uint8x16_t) -> uint8x16_t { - static_assert_uimm_bits!(N, 4); - unsafe { simd_shuffle!(a, a, [N as u32; 16]) } +pub fn vdup_lane_u64(a: uint64x1_t) -> uint64x1_t { + static_assert!(N == 0); + a } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, N = 1) + assert_instr(dup, N = 4) )] #[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_laneq_s64(a: int64x2_t) -> int64x1_t { - static_assert_uimm_bits!(N, 1); - unsafe { transmute::(simd_extract!(a, N as u32)) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vdup_laneq_f16(a: float16x8_t) -> float16x4_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shuffle!(a, a, [N as u32; 4]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, N = 1) + assert_instr(dup, N = 4) )] #[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_laneq_u64(a: uint64x2_t) -> uint64x1_t { - static_assert_uimm_bits!(N, 1); - unsafe { transmute::(simd_extract!(a, N as u32)) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vdup_laneq_f16(a: float16x8_t) -> float16x4_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x4_t = simd_shuffle!(a, a, [N as u32; 4]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Create a new vector with all lanes set to a value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_f16)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 4) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "stdarch_neon_fp16", since = "1.94.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] #[cfg(not(target_arch = "arm64ec"))] -pub fn vdup_n_f16(a: f16) -> float16x4_t { - float16x4_t::splat(a) +pub fn vdupq_laneq_f16(a: float16x8_t) -> float16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } -#[doc = "Create a new vector with all lanes set to a value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_f16)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 4) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "stdarch_neon_fp16", since = "1.94.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] #[cfg(not(target_arch = "arm64ec"))] -pub fn vdupq_n_f16(a: f16) -> float16x8_t { - float16x8_t::splat(a) +pub fn vdupq_laneq_f16(a: float16x8_t) -> float16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = simd_shuffle!(a, a, [N as u32; 8]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_f32)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11196,19 +11262,22 @@ pub fn vdupq_n_f16(a: f16) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_n_f32(value: f32) -> float32x2_t { - float32x2_t::splat(value) +pub fn vdup_laneq_f32(a: float32x4_t) -> float32x2_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_p16)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11217,19 +11286,26 @@ pub fn vdup_n_f32(value: f32) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_n_p16(value: p16) -> poly16x4_t { - poly16x4_t::splat(value) +pub fn vdup_laneq_f32(a: float32x4_t) -> float32x2_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float32x2_t = simd_shuffle!(a, a, [N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_p8)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11238,19 +11314,22 @@ pub fn vdup_n_p16(value: p16) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_n_p8(value: p8) -> poly8x8_t { - poly8x8_t::splat(value) +pub fn vdup_laneq_s32(a: int32x4_t) -> int32x2_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_s16)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11259,19 +11338,26 @@ pub fn vdup_n_p8(value: p8) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_n_s16(value: i16) -> int16x4_t { - int16x4_t::splat(value) +pub fn vdup_laneq_s32(a: int32x4_t) -> int32x2_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: int32x2_t = simd_shuffle!(a, a, [N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_s32)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11280,19 +11366,22 @@ pub fn vdup_n_s16(value: i16) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_n_s32(value: i32) -> int32x2_t { - int32x2_t::splat(value) +pub fn vdup_laneq_u32(a: uint32x4_t) -> uint32x2_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_s64)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmov) + assert_instr(dup, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11301,19 +11390,26 @@ pub fn vdup_n_s32(value: i32) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_n_s64(value: i64) -> int64x1_t { - int64x1_t::splat(value) +pub fn vdup_laneq_u32(a: uint32x4_t) -> uint32x2_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: uint32x2_t = simd_shuffle!(a, a, [N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_s8)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11322,19 +11418,22 @@ pub fn vdup_n_s64(value: i64) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_n_s8(value: i8) -> int8x8_t { - int8x8_t::splat(value) +pub fn vdupq_laneq_f32(a: float32x4_t) -> float32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_u16)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11343,19 +11442,26 @@ pub fn vdup_n_s8(value: i8) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_n_u16(value: u16) -> uint16x4_t { - uint16x4_t::splat(value) +pub fn vdupq_laneq_f32(a: float32x4_t) -> float32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float32x4_t = simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_u32)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11364,19 +11470,22 @@ pub fn vdup_n_u16(value: u16) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_n_u32(value: u32) -> uint32x2_t { - uint32x2_t::splat(value) +pub fn vdupq_laneq_s32(a: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_u64)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmov) + assert_instr(dup, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11385,19 +11494,26 @@ pub fn vdup_n_u32(value: u32) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_n_u64(value: u64) -> uint64x1_t { - uint64x1_t::splat(value) +pub fn vdupq_laneq_s32(a: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: int32x4_t = simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_u8)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11406,19 +11522,22 @@ pub fn vdup_n_u64(value: u64) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdup_n_u8(value: u8) -> uint8x8_t { - uint8x8_t::splat(value) +pub fn vdupq_laneq_u32(a: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_f32)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11427,19 +11546,26 @@ pub fn vdup_n_u8(value: u8) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_n_f32(value: f32) -> float32x4_t { - float32x4_t::splat(value) +pub fn vdupq_laneq_u32(a: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: uint32x4_t = simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_p16)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 4) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11448,19 +11574,22 @@ pub fn vdupq_n_f32(value: f32) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_n_p16(value: p16) -> poly16x8_t { - poly16x8_t::splat(value) +pub fn vdup_laneq_p16(a: poly16x8_t) -> poly16x4_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shuffle!(a, a, [N as u32; 4]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_p8)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 4) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11469,19 +11598,26 @@ pub fn vdupq_n_p16(value: p16) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_n_p8(value: p8) -> poly8x16_t { - poly8x16_t::splat(value) +pub fn vdup_laneq_p16(a: poly16x8_t) -> poly16x4_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x4_t = simd_shuffle!(a, a, [N as u32; 4]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_s16)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 4) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11490,19 +11626,22 @@ pub fn vdupq_n_p8(value: p8) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_n_s16(value: i16) -> int16x8_t { - int16x8_t::splat(value) +pub fn vdup_laneq_s16(a: int16x8_t) -> int16x4_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shuffle!(a, a, [N as u32; 4]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_s32)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 4) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11511,19 +11650,26 @@ pub fn vdupq_n_s16(value: i16) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_n_s32(value: i32) -> int32x4_t { - int32x4_t::splat(value) +pub fn vdup_laneq_s16(a: int16x8_t) -> int16x4_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x4_t = simd_shuffle!(a, a, [N as u32; 4]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_s64)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 4) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11532,19 +11678,22 @@ pub fn vdupq_n_s32(value: i32) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_n_s64(value: i64) -> int64x2_t { - int64x2_t::splat(value) +pub fn vdup_laneq_u16(a: uint16x8_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shuffle!(a, a, [N as u32; 4]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_s8)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 4) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11553,19 +11702,26 @@ pub fn vdupq_n_s64(value: i64) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_n_s8(value: i8) -> int8x16_t { - int8x16_t::splat(value) +pub fn vdup_laneq_u16(a: uint16x8_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x4_t = simd_shuffle!(a, a, [N as u32; 4]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u16)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 4) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11574,19 +11730,22 @@ pub fn vdupq_n_s8(value: i8) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_n_u16(value: u16) -> uint16x8_t { - uint16x8_t::splat(value) +pub fn vdupq_laneq_p16(a: poly16x8_t) -> poly16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u32)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 4) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11595,19 +11754,26 @@ pub fn vdupq_n_u16(value: u16) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_n_u32(value: u32) -> uint32x4_t { - uint32x4_t::splat(value) +pub fn vdupq_laneq_p16(a: poly16x8_t) -> poly16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x8_t = simd_shuffle!(a, a, [N as u32; 8]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u64)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 4) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11616,19 +11782,22 @@ pub fn vdupq_n_u32(value: u32) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_n_u64(value: u64) -> uint64x2_t { - uint64x2_t::splat(value) +pub fn vdupq_laneq_s16(a: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u8)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 4) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11637,19 +11806,26 @@ pub fn vdupq_n_u64(value: u64) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_n_u8(value: u8) -> uint8x16_t { - uint8x16_t::splat(value) +pub fn vdupq_laneq_s16(a: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x8_t = simd_shuffle!(a, a, [N as u32; 8]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_f32_vfp4)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 4) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11658,19 +11834,22 @@ pub fn vdupq_n_u8(value: u8) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -fn vdup_n_f32_vfp4(value: f32) -> float32x2_t { - float32x2_t::splat(value) +pub fn vdupq_laneq_u16(a: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_f32_vfp4)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(dup, N = 4) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11679,18 +11858,24 @@ fn vdup_n_f32_vfp4(value: f32) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -fn vdupq_n_f32_vfp4(value: f32) -> float32x4_t { - float32x4_t::splat(value) +pub fn vdupq_laneq_u16(a: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x8_t = simd_shuffle!(a, a, [N as u32; 8]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 0) + assert_instr(dup, N = 8) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -11701,19 +11886,20 @@ fn vdupq_n_f32_vfp4(value: f32) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_lane_s64(a: int64x1_t) -> int64x2_t { - static_assert!(N == 0); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } +pub fn vdup_laneq_p8(a: poly8x16_t) -> poly8x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 0) + assert_instr(dup, N = 8) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -11724,19 +11910,25 @@ pub fn vdupq_lane_s64(a: int64x1_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_lane_u64(a: uint64x1_t) -> uint64x2_t { - static_assert!(N == 0); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } +pub fn vdup_laneq_p8(a: poly8x16_t) -> poly8x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = simd_shuffle!(a, a, [N as u32; 8]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 1) + assert_instr(dup, N = 8) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -11747,19 +11939,20 @@ pub fn vdupq_lane_u64(a: uint64x1_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_laneq_s64(a: int64x2_t) -> int64x2_t { - static_assert_uimm_bits!(N, 1); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } +pub fn vdup_laneq_s8(a: int8x16_t) -> int8x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup, N = 1) + assert_instr(dup, N = 8) )] #[rustc_legacy_const_generics(1)] #[cfg_attr( @@ -11770,20 +11963,27 @@ pub fn vdupq_laneq_s64(a: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vdupq_laneq_u64(a: uint64x2_t) -> uint64x2_t { - static_assert_uimm_bits!(N, 1); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } +pub fn vdup_laneq_s8(a: int8x16_t) -> int8x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_shuffle!(a, a, [N as u32; 8]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s8)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(dup, N = 8) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11792,19 +11992,22 @@ pub fn vdupq_laneq_u64(a: uint64x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veor_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe { simd_xor(a, b) } +pub fn vdup_laneq_u8(a: uint8x16_t) -> uint8x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s8)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(dup, N = 8) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11813,19 +12016,27 @@ pub fn veor_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veorq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - unsafe { simd_xor(a, b) } +pub fn vdup_laneq_u8(a: uint8x16_t) -> uint8x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_shuffle!(a, a, [N as u32; 8]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s16)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(dup, N = 8) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11834,19 +12045,22 @@ pub fn veorq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veor_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe { simd_xor(a, b) } +pub fn vdupq_laneq_p8(a: poly8x16_t) -> poly8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { simd_shuffle!(a, a, [N as u32; 16]) } } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s16)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(dup, N = 8) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11855,19 +12069,31 @@ pub fn veor_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veorq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe { simd_xor(a, b) } +pub fn vdupq_laneq_p8(a: poly8x16_t) -> poly8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = simd_shuffle!(a, a, [N as u32; 16]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s32)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(dup, N = 8) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11876,19 +12102,22 @@ pub fn veorq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veor_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe { simd_xor(a, b) } +pub fn vdupq_laneq_s8(a: int8x16_t) -> int8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { simd_shuffle!(a, a, [N as u32; 16]) } } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s32)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(dup, N = 8) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11897,19 +12126,31 @@ pub fn veor_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veorq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe { simd_xor(a, b) } +pub fn vdupq_laneq_s8(a: int8x16_t) -> int8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = simd_shuffle!(a, a, [N as u32; 16]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s64)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(dup, N = 8) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11918,19 +12159,22 @@ pub fn veorq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veor_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - unsafe { simd_xor(a, b) } +pub fn vdupq_laneq_u8(a: uint8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { simd_shuffle!(a, a, [N as u32; 16]) } } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s64)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(dup, N = 8) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11939,19 +12183,30 @@ pub fn veor_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veorq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - unsafe { simd_xor(a, b) } +pub fn vdupq_laneq_u8(a: uint8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = simd_shuffle!(a, a, [N as u32; 16]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) + } } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u8)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(nop, N = 1) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11960,19 +12215,21 @@ pub fn veorq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veor_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { simd_xor(a, b) } +pub fn vdup_laneq_s64(a: int64x2_t) -> int64x1_t { + static_assert_uimm_bits!(N, 1); + unsafe { transmute(vgetq_lane_s64::(a)) } } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u8)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(nop, N = 1) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -11981,39 +12238,51 @@ pub fn veor_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veorq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - unsafe { simd_xor(a, b) } +pub fn vdup_laneq_u64(a: uint64x2_t) -> uint64x1_t { + static_assert_uimm_bits!(N, 1); + unsafe { transmute(vgetq_lane_u64::(a)) } } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u16)"] +#[doc = "Create a new vector with all lanes set to a value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_f16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + assert_instr(dup) )] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vdup_n_f16(a: f16) -> float16x4_t { + float16x4_t::splat(a) +} +#[doc = "Create a new vector with all lanes set to a value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_f16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) )] -pub fn veor_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe { simd_xor(a, b) } +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vdupq_n_f16(a: f16) -> float16x8_t { + float16x8_t::splat(a) } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -12023,18 +12292,18 @@ pub fn veor_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veorq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe { simd_xor(a, b) } +pub fn vdup_n_f32(value: f32) -> float32x2_t { + float32x2_t::splat(value) } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_p16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -12044,18 +12313,18 @@ pub fn veorq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veor_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe { simd_xor(a, b) } +pub fn vdup_n_p16(value: p16) -> poly16x4_t { + poly16x4_t::splat(value) } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_p8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -12065,18 +12334,18 @@ pub fn veor_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veorq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe { simd_xor(a, b) } +pub fn vdup_n_p8(value: p8) -> poly8x8_t { + poly8x8_t::splat(value) } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u64)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -12086,18 +12355,18 @@ pub fn veorq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veor_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - unsafe { simd_xor(a, b) } +pub fn vdup_n_s16(value: i16) -> int16x4_t { + int16x4_t::splat(value) } -#[doc = "Vector bitwise exclusive or (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u64)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(eor) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -12107,52 +12376,40 @@ pub fn veor_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn veorq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - unsafe { simd_xor(a, b) } +pub fn vdup_n_s32(value: i32) -> int32x2_t { + int32x2_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_s64)"] #[inline] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 3) + assert_instr(fmov) )] -#[rustc_legacy_const_generics(2)] -#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vext_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { - static_assert_uimm_bits!(N, 2); - unsafe { - match N & 0b11 { - 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), - 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), - 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), - 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), - _ => unreachable_unchecked(), - } - } +pub fn vdup_n_s64(value: i64) -> int64x1_t { + int64x1_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 1) + assert_instr(dup) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12161,27 +12418,19 @@ pub fn vext_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vext_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - static_assert_uimm_bits!(N, 1); - unsafe { - match N & 0b1 { - 0 => simd_shuffle!(a, b, [0, 1]), - 1 => simd_shuffle!(a, b, [1, 2]), - _ => unreachable_unchecked(), - } - } +pub fn vdup_n_s8(value: i8) -> int8x8_t { + int8x8_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 1) + assert_instr(dup) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12190,27 +12439,19 @@ pub fn vext_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vext_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - static_assert_uimm_bits!(N, 1); - unsafe { - match N & 0b1 { - 0 => simd_shuffle!(a, b, [0, 1]), - 1 => simd_shuffle!(a, b, [1, 2]), - _ => unreachable_unchecked(), - } - } +pub fn vdup_n_u16(value: u16) -> uint16x4_t { + uint16x4_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 1) + assert_instr(dup) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12219,29 +12460,19 @@ pub fn vext_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vext_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - static_assert_uimm_bits!(N, 1); - unsafe { - match N & 0b1 { - 0 => simd_shuffle!(a, b, [0, 1]), - 1 => simd_shuffle!(a, b, [1, 2]), - _ => unreachable_unchecked(), - } - } +pub fn vdup_n_u32(value: u32) -> uint32x2_t { + uint32x2_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, N = 0) + assert_instr(fmov) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12250,23 +12481,19 @@ pub fn vext_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vext_s64(a: int64x1_t, _b: int64x1_t) -> int64x1_t { - static_assert!(N == 0); - a +pub fn vdup_n_u64(value: u64) -> uint64x1_t { + uint64x1_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, N = 0) + assert_instr(dup) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12275,21 +12502,19 @@ pub unsafe fn vext_s64(a: int64x1_t, _b: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vext_u64(a: uint64x1_t, _b: uint64x1_t) -> uint64x1_t { - static_assert!(N == 0); - a +pub fn vdup_n_u8(value: u8) -> uint8x8_t { + uint8x8_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s8)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 7) + assert_instr(dup) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12298,33 +12523,19 @@ pub unsafe fn vext_u64(a: uint64x1_t, _b: uint64x1_t) -> uint64x1_ target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vext_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - static_assert_uimm_bits!(N, 3); - unsafe { - match N & 0b111 { - 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), - 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), - 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), - 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), - 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), - 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), - 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), - _ => unreachable_unchecked(), - } - } +pub fn vdupq_n_f32(value: f32) -> float32x4_t { + float32x4_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_p16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 7) + assert_instr(dup) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12333,33 +12544,19 @@ pub fn vext_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vextq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - static_assert_uimm_bits!(N, 3); - unsafe { - match N & 0b111 { - 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), - 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), - 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), - 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), - 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), - 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), - 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), - _ => unreachable_unchecked(), - } - } +pub fn vdupq_n_p16(value: p16) -> poly16x8_t { + poly16x8_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u8)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_p8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 7) + assert_instr(dup) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12368,33 +12565,19 @@ pub fn vextq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vext_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - static_assert_uimm_bits!(N, 3); - unsafe { - match N & 0b111 { - 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), - 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), - 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), - 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), - 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), - 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), - 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), - _ => unreachable_unchecked(), - } - } +pub fn vdupq_n_p8(value: p8) -> poly8x16_t { + poly8x16_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 7) + assert_instr(dup) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12403,33 +12586,19 @@ pub fn vext_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vextq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - static_assert_uimm_bits!(N, 3); - unsafe { - match N & 0b111 { - 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), - 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), - 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), - 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), - 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), - 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), - 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), - _ => unreachable_unchecked(), - } - } +pub fn vdupq_n_s16(value: i16) -> int16x8_t { + int16x8_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p8)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 7) + assert_instr(dup) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12438,33 +12607,19 @@ pub fn vextq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vext_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - static_assert_uimm_bits!(N, 3); - unsafe { - match N & 0b111 { - 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), - 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), - 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), - 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), - 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), - 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), - 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), - _ => unreachable_unchecked(), - } - } +pub fn vdupq_n_s32(value: i32) -> int32x4_t { + int32x4_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 7) + assert_instr(dup) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12473,69 +12628,40 @@ pub fn vext_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vextq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - static_assert_uimm_bits!(N, 3); - unsafe { - match N & 0b111 { - 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), - 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), - 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), - 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), - 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), - 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), - 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), - _ => unreachable_unchecked(), - } - } +pub fn vdupq_n_s64(value: i64) -> int64x2_t { + int64x2_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_s8)"] #[inline] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 7) + assert_instr(dup) )] -#[rustc_legacy_const_generics(2)] -#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vextq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { - static_assert_uimm_bits!(N, 3); - unsafe { - match N & 0b111 { - 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), - 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), - 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), - 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), - 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), - 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), - 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), - _ => unreachable_unchecked(), - } - } +pub fn vdupq_n_s8(value: i8) -> int8x16_t { + int8x16_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 3) + assert_instr(dup) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12544,29 +12670,19 @@ pub fn vextq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vextq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - static_assert_uimm_bits!(N, 2); - unsafe { - match N & 0b11 { - 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), - 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), - 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), - 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), - _ => unreachable_unchecked(), - } - } +pub fn vdupq_n_u16(value: u16) -> uint16x8_t { + uint16x8_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 3) + assert_instr(dup) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12575,29 +12691,19 @@ pub fn vextq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vext_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - static_assert_uimm_bits!(N, 2); - unsafe { - match N & 0b11 { - 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), - 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), - 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), - 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), - _ => unreachable_unchecked(), - } - } +pub fn vdupq_n_u32(value: u32) -> uint32x4_t { + uint32x4_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 3) + assert_instr(dup) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12606,29 +12712,19 @@ pub fn vext_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vextq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - static_assert_uimm_bits!(N, 2); - unsafe { - match N & 0b11 { - 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), - 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), - 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), - 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), - _ => unreachable_unchecked(), - } - } +pub fn vdupq_n_u64(value: u64) -> uint64x2_t { + uint64x2_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 3) + assert_instr(dup) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12637,29 +12733,19 @@ pub fn vextq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vext_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - static_assert_uimm_bits!(N, 2); - unsafe { - match N & 0b11 { - 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), - 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), - 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), - 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), - _ => unreachable_unchecked(), - } - } +pub fn vdupq_n_u8(value: u8) -> uint8x16_t { + uint8x16_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_f32_vfp4)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 3) + assert_instr(dup) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12668,29 +12754,19 @@ pub fn vext_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vextq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - static_assert_uimm_bits!(N, 2); - unsafe { - match N & 0b11 { - 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), - 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), - 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), - 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), - _ => unreachable_unchecked(), - } - } +fn vdup_n_f32_vfp4(value: f32) -> float32x2_t { + float32x2_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_f32_vfp4)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 3) + assert_instr(dup) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12699,29 +12775,21 @@ pub fn vextq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vext_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - static_assert_uimm_bits!(N, 2); - unsafe { - match N & 0b11 { - 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), - 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), - 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), - 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), - _ => unreachable_unchecked(), - } - } +fn vdupq_n_f32_vfp4(value: f32) -> float32x4_t { + float32x4_t::splat(value) } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s64)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 1) + assert_instr(dup, N = 0) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12730,27 +12798,22 @@ pub fn vext_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vextq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - static_assert_uimm_bits!(N, 1); - unsafe { - match N & 0b1 { - 0 => simd_shuffle!(a, b, [0, 1]), - 1 => simd_shuffle!(a, b, [1, 2]), - _ => unreachable_unchecked(), - } - } +pub fn vdupq_lane_s64(a: int64x1_t) -> int64x2_t { + static_assert!(N == 0); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u64)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 1) + assert_instr(dup, N = 0) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12759,27 +12822,25 @@ pub fn vextq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vextq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - static_assert_uimm_bits!(N, 1); +pub fn vdupq_lane_s64(a: int64x1_t) -> int64x2_t { + static_assert!(N == 0); unsafe { - match N & 0b1 { - 0 => simd_shuffle!(a, b, [0, 1]), - 1 => simd_shuffle!(a, b, [1, 2]), - _ => unreachable_unchecked(), - } + let ret_val: int64x2_t = simd_shuffle!(a, a, [N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s8)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 15) + assert_instr(dup, N = 0) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12788,101 +12849,22 @@ pub fn vextq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vextq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - static_assert_uimm_bits!(N, 4); - unsafe { - match N & 0b1111 { - 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 1 => simd_shuffle!( - a, - b, - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] - ), - 2 => simd_shuffle!( - a, - b, - [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] - ), - 3 => simd_shuffle!( - a, - b, - [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] - ), - 4 => simd_shuffle!( - a, - b, - [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] - ), - 5 => simd_shuffle!( - a, - b, - [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] - ), - 6 => simd_shuffle!( - a, - b, - [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] - ), - 7 => simd_shuffle!( - a, - b, - [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] - ), - 8 => simd_shuffle!( - a, - b, - [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] - ), - 9 => simd_shuffle!( - a, - b, - [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] - ), - 10 => simd_shuffle!( - a, - b, - [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] - ), - 11 => simd_shuffle!( - a, - b, - [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26] - ), - 12 => simd_shuffle!( - a, - b, - [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] - ), - 13 => simd_shuffle!( - a, - b, - [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] - ), - 14 => simd_shuffle!( - a, - b, - [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] - ), - 15 => simd_shuffle!( - a, - b, - [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30] - ), - _ => unreachable_unchecked(), - } - } +pub fn vdupq_lane_u64(a: uint64x1_t) -> uint64x2_t { + static_assert!(N == 0); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u8)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 15) + assert_instr(dup, N = 0) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12891,101 +12873,25 @@ pub fn vextq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vextq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - static_assert_uimm_bits!(N, 4); +pub fn vdupq_lane_u64(a: uint64x1_t) -> uint64x2_t { + static_assert!(N == 0); unsafe { - match N & 0b1111 { - 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 1 => simd_shuffle!( - a, - b, - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] - ), - 2 => simd_shuffle!( - a, - b, - [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] - ), - 3 => simd_shuffle!( - a, - b, - [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] - ), - 4 => simd_shuffle!( - a, - b, - [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] - ), - 5 => simd_shuffle!( - a, - b, - [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] - ), - 6 => simd_shuffle!( - a, - b, - [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] - ), - 7 => simd_shuffle!( - a, - b, - [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] - ), - 8 => simd_shuffle!( - a, - b, - [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] - ), - 9 => simd_shuffle!( - a, - b, - [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] - ), - 10 => simd_shuffle!( - a, - b, - [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] - ), - 11 => simd_shuffle!( - a, - b, - [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26] - ), - 12 => simd_shuffle!( - a, - b, - [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] - ), - 13 => simd_shuffle!( - a, - b, - [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] - ), - 14 => simd_shuffle!( - a, - b, - [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] - ), - 15 => simd_shuffle!( - a, - b, - [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30] - ), - _ => unreachable_unchecked(), - } + let ret_val: uint64x2_t = simd_shuffle!(a, a, [N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Extract vector from pair of vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p8)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext, N = 15) + assert_instr(dup, N = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -12994,144 +12900,74 @@ pub fn vextq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vextq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { - static_assert_uimm_bits!(N, 4); - unsafe { - match N & 0b1111 { - 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 1 => simd_shuffle!( - a, - b, - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] - ), - 2 => simd_shuffle!( - a, - b, - [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] - ), - 3 => simd_shuffle!( - a, - b, - [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] - ), - 4 => simd_shuffle!( - a, - b, - [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] - ), - 5 => simd_shuffle!( - a, - b, - [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] - ), - 6 => simd_shuffle!( - a, - b, - [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] - ), - 7 => simd_shuffle!( - a, - b, - [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] - ), - 8 => simd_shuffle!( - a, - b, - [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] - ), - 9 => simd_shuffle!( - a, - b, - [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] - ), - 10 => simd_shuffle!( - a, - b, - [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] - ), - 11 => simd_shuffle!( - a, - b, - [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26] - ), - 12 => simd_shuffle!( - a, - b, - [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27] - ), - 13 => simd_shuffle!( - a, - b, - [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28] - ), - 14 => simd_shuffle!( - a, - b, - [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] - ), - 15 => simd_shuffle!( - a, - b, - [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30] - ), - _ => unreachable_unchecked(), - } - } +pub fn vdupq_laneq_s64(a: int64x2_t) -> int64x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } } -#[doc = "Floating-point fused Multiply-Add to accumulator (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f16)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s64)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmla) + assert_instr(dup, N = 1) )] -#[target_feature(enable = "neon,fp16")] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vfma_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { - unsafe { simd_fma(b, c, a) } +pub fn vdupq_laneq_s64(a: int64x2_t) -> int64x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: int64x2_t = simd_shuffle!(a, a, [N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Floating-point fused Multiply-Add to accumulator (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_f16)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u64)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmla) + assert_instr(dup, N = 1) )] -#[target_feature(enable = "neon,fp16")] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vfmaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { - unsafe { simd_fma(b, c, a) } +pub fn vdupq_laneq_u64(a: uint64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) } } -#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f32)"] +#[doc = "Set all vector lanes to the same value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmla) + assert_instr(dup, N = 1) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13140,18 +12976,23 @@ pub fn vfmaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { - unsafe { simd_fma(b, c, a) } +pub fn vdupq_laneq_u64(a: uint64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: uint64x2_t = simd_shuffle!(a, a, [N as u32, N as u32]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_f32)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmla) + assert_instr(eor) )] #[cfg_attr( not(target_arch = "arm"), @@ -13161,18 +13002,18 @@ pub fn vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { - unsafe { simd_fma(b, c, a) } +pub fn veor_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_xor(a, b) } } -#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_n_f32)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmla) + assert_instr(eor) )] #[cfg_attr( not(target_arch = "arm"), @@ -13182,18 +13023,18 @@ pub fn vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vfma_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { - vfma_f32(a, b, vdup_n_f32_vfp4(c)) +pub fn veorq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { simd_xor(a, b) } } -#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_n_f32)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmla) + assert_instr(eor) )] #[cfg_attr( not(target_arch = "arm"), @@ -13203,70 +13044,60 @@ pub fn vfma_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vfmaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { - vfmaq_f32(a, b, vdupq_n_f32_vfp4(c)) +pub fn veor_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { simd_xor(a, b) } } -#[doc = "Floating-point fused multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f16)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s16)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmls) + assert_instr(eor) )] -#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vfms_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { - unsafe { - let b: float16x4_t = simd_neg(b); - vfma_f16(a, b, c) - } +pub fn veorq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { simd_xor(a, b) } } -#[doc = "Floating-point fused multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_f16)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s32)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmls) + assert_instr(eor) )] -#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vfmsq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { - unsafe { - let b: float16x8_t = simd_neg(b); - vfmaq_f16(a, b, c) - } +pub fn veor_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { simd_xor(a, b) } } -#[doc = "Floating-point fused multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f32)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmls) + assert_instr(eor) )] #[cfg_attr( not(target_arch = "arm"), @@ -13276,21 +13107,18 @@ pub fn vfmsq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vfms_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { - unsafe { - let b: float32x2_t = simd_neg(b); - vfma_f32(a, b, c) - } +pub fn veorq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { simd_xor(a, b) } } -#[doc = "Floating-point fused multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_f32)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmls) + assert_instr(eor) )] #[cfg_attr( not(target_arch = "arm"), @@ -13300,21 +13128,18 @@ pub fn vfms_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vfmsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { - unsafe { - let b: float32x4_t = simd_neg(b); - vfmaq_f32(a, b, c) - } +pub fn veor_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe { simd_xor(a, b) } } -#[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_n_f32)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmls) + assert_instr(eor) )] #[cfg_attr( not(target_arch = "arm"), @@ -13324,18 +13149,18 @@ pub fn vfmsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vfms_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { - vfms_f32(a, b, vdup_n_f32_vfp4(c)) +pub fn veorq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { simd_xor(a, b) } } -#[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_n_f32)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmls) + assert_instr(eor) )] #[cfg_attr( not(target_arch = "arm"), @@ -13345,56 +13170,39 @@ pub fn vfms_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vfmsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { - vfmsq_f32(a, b, vdupq_n_f32_vfp4(c)) +pub fn veor_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_xor(a, b) } } -#[doc = "Duplicate vector element to vector"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f16)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(eor) )] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(nop))] -pub fn vget_high_f16(a: float16x8_t) -> float16x4_t { - unsafe { simd_shuffle!(a, a, [4, 5, 6, 7]) } -} -#[doc = "Duplicate vector element to vector"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(nop))] -pub fn vget_low_f16(a: float16x8_t) -> float16x4_t { - unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) } +pub fn veorq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { simd_xor(a, b) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f32)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext) + assert_instr(eor) )] #[cfg_attr( not(target_arch = "arm"), @@ -13404,18 +13212,18 @@ pub fn vget_low_f16(a: float16x8_t) -> float16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_high_f32(a: float32x4_t) -> float32x2_t { - unsafe { simd_shuffle!(a, a, [2, 3]) } +pub fn veor_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_xor(a, b) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p16)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext) + assert_instr(eor) )] #[cfg_attr( not(target_arch = "arm"), @@ -13425,18 +13233,18 @@ pub fn vget_high_f32(a: float32x4_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_high_p16(a: poly16x8_t) -> poly16x4_t { - unsafe { simd_shuffle!(a, a, [4, 5, 6, 7]) } +pub fn veorq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_xor(a, b) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p8)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext) + assert_instr(eor) )] #[cfg_attr( not(target_arch = "arm"), @@ -13446,18 +13254,18 @@ pub fn vget_high_p16(a: poly16x8_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_high_p8(a: poly8x16_t) -> poly8x8_t { - unsafe { simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) } +pub fn veor_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_xor(a, b) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s16)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext) + assert_instr(eor) )] #[cfg_attr( not(target_arch = "arm"), @@ -13467,18 +13275,18 @@ pub fn vget_high_p8(a: poly8x16_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_high_s16(a: int16x8_t) -> int16x4_t { - unsafe { simd_shuffle!(a, a, [4, 5, 6, 7]) } +pub fn veorq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_xor(a, b) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s32)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext) + assert_instr(eor) )] #[cfg_attr( not(target_arch = "arm"), @@ -13488,18 +13296,18 @@ pub fn vget_high_s16(a: int16x8_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_high_s32(a: int32x4_t) -> int32x2_t { - unsafe { simd_shuffle!(a, a, [2, 3]) } +pub fn veor_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + unsafe { simd_xor(a, b) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s8)"] +#[doc = "Vector bitwise exclusive or (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext) + assert_instr(eor) )] #[cfg_attr( not(target_arch = "arm"), @@ -13509,61 +13317,77 @@ pub fn vget_high_s32(a: int32x4_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_high_s8(a: int8x16_t) -> int8x8_t { - unsafe { simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) } +pub fn veorq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { simd_xor(a, b) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u16)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f16)"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext) + assert_instr(ext, N = 3) )] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_high_u16(a: uint16x8_t) -> uint16x4_t { - unsafe { simd_shuffle!(a, a, [4, 5, 6, 7]) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vext_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u32)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f16)"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext) + assert_instr(ext, N = 3) )] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_high_u32(a: uint32x4_t) -> uint32x2_t { - unsafe { simd_shuffle!(a, a, [2, 3]) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vext_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = + simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u8)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext) + assert_instr(ext, N = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13572,19 +13396,22 @@ pub fn vget_high_u32(a: uint32x4_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_high_u8(a: uint8x16_t) -> uint8x8_t { - unsafe { simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) } +pub fn vext_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s64)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext) + assert_instr(ext, N = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13593,19 +13420,27 @@ pub fn vget_high_u8(a: uint8x16_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_high_s64(a: int64x2_t) -> int64x1_t { - unsafe { int64x1_t([simd_extract!(a, 1)]) } +pub fn vext_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = simd_shuffle!(a, b, [N as u32, N as u32 + 1]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u64)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ext) + assert_instr(ext, N = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13614,52 +13449,22 @@ pub fn vget_high_s64(a: int64x2_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_high_u64(a: uint64x2_t) -> uint64x1_t { - unsafe { uint64x1_t([simd_extract!(a, 1)]) } -} -#[doc = "Duplicate vector element to scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_f16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vget_lane_f16(a: float16x4_t) -> f16 { - static_assert_uimm_bits!(LANE, 2); - unsafe { simd_extract!(a, LANE as u32) } +pub fn vext_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) } } -#[doc = "Duplicate vector element to scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f16)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(ext, N = 1) )] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vgetq_lane_f16(a: float16x8_t) -> f16 { - static_assert_uimm_bits!(LANE, 3); - unsafe { simd_extract!(a, LANE as u32) } -} -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_f32)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13668,17 +13473,27 @@ pub fn vgetq_lane_f16(a: float16x8_t) -> f16 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_lane_f32(v: float32x2_t) -> f32 { - static_assert_uimm_bits!(IMM5, 1); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vext_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int32x2_t = simd_shuffle!(a, b, [N as u32, N as u32 + 1]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_p16)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 1) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13687,17 +13502,22 @@ pub fn vget_lane_f32(v: float32x2_t) -> f32 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_lane_p16(v: poly16x4_t) -> p16 { - static_assert_uimm_bits!(IMM5, 2); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vext_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_p8)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 1) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13706,17 +13526,28 @@ pub fn vget_lane_p16(v: poly16x4_t) -> p16 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_lane_p8(v: poly8x8_t) -> p8 { - static_assert_uimm_bits!(IMM5, 3); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vext_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint32x2_t = simd_shuffle!(a, b, [N as u32, N as u32 + 1]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s16)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, N = 0) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13725,17 +13556,23 @@ pub fn vget_lane_p8(v: poly8x8_t) -> p8 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_lane_s16(v: int16x4_t) -> i16 { - static_assert_uimm_bits!(IMM5, 2); - unsafe { simd_extract!(v, IMM5 as u32) } +pub unsafe fn vext_s64(a: int64x1_t, _b: int64x1_t) -> int64x1_t { + static_assert!(N == 0); + a } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s32)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, N = 0) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13744,17 +13581,22 @@ pub fn vget_lane_s16(v: int16x4_t) -> i16 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_lane_s32(v: int32x2_t) -> i32 { - static_assert_uimm_bits!(IMM5, 1); - unsafe { simd_extract!(v, IMM5 as u32) } +pub unsafe fn vext_u64(a: uint64x1_t, _b: uint64x1_t) -> uint64x1_t { + static_assert!(N == 0); + a } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s8)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 7) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13763,17 +13605,37 @@ pub fn vget_lane_s32(v: int32x2_t) -> i32 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_lane_s8(v: int8x8_t) -> i8 { - static_assert_uimm_bits!(IMM5, 3); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vext_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7 + ] + ) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u16)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 7) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13782,17 +13644,40 @@ pub fn vget_lane_s8(v: int8x8_t) -> i8 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_lane_u16(v: uint16x4_t) -> u16 { - static_assert_uimm_bits!(IMM5, 2); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vext_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7 + ] + ); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u32)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 7) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13801,17 +13686,37 @@ pub fn vget_lane_u16(v: uint16x4_t) -> u16 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_lane_u32(v: uint32x2_t) -> u32 { - static_assert_uimm_bits!(IMM5, 1); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vextq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7 + ] + ) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u8)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 7) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13820,17 +13725,40 @@ pub fn vget_lane_u32(v: uint32x2_t) -> u32 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_lane_u8(v: uint8x8_t) -> u8 { - static_assert_uimm_bits!(IMM5, 3); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vextq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x8_t = simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7 + ] + ); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f32)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 7) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13839,17 +13767,37 @@ pub fn vget_lane_u8(v: uint8x8_t) -> u8 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vgetq_lane_f32(v: float32x4_t) -> f32 { - static_assert_uimm_bits!(IMM5, 2); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vext_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7 + ] + ) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p16)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 7) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13858,17 +13806,40 @@ pub fn vgetq_lane_f32(v: float32x4_t) -> f32 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vgetq_lane_p16(v: poly16x8_t) -> p16 { - static_assert_uimm_bits!(IMM5, 3); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vext_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7 + ] + ); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p64)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 7) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13877,17 +13848,37 @@ pub fn vgetq_lane_p16(v: poly16x8_t) -> p16 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vgetq_lane_p64(v: poly64x2_t) -> p64 { - static_assert_uimm_bits!(IMM5, 1); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vextq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7 + ] + ) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p8)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 7) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13896,17 +13887,40 @@ pub fn vgetq_lane_p64(v: poly64x2_t) -> p64 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vgetq_lane_p8(v: poly8x16_t) -> p8 { - static_assert_uimm_bits!(IMM5, 4); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vextq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x8_t = simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7 + ] + ); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s16)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 7) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13915,17 +13929,37 @@ pub fn vgetq_lane_p8(v: poly8x16_t) -> p8 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vgetq_lane_s16(v: int16x8_t) -> i16 { - static_assert_uimm_bits!(IMM5, 3); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vext_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7 + ] + ) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s32)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 7) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13934,17 +13968,40 @@ pub fn vgetq_lane_s16(v: int16x8_t) -> i16 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vgetq_lane_s32(v: int32x4_t) -> i32 { - static_assert_uimm_bits!(IMM5, 2); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vext_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7 + ] + ); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s64)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 7) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13953,17 +14010,37 @@ pub fn vgetq_lane_s32(v: int32x4_t) -> i32 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vgetq_lane_s64(v: int64x2_t) -> i64 { - static_assert_uimm_bits!(IMM5, 1); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vextq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7 + ] + ) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s8)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 7) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -13972,55 +14049,123 @@ pub fn vgetq_lane_s64(v: int64x2_t) -> i64 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vgetq_lane_s8(v: int8x16_t) -> i8 { - static_assert_uimm_bits!(IMM5, 4); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vextq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x8_t = simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7 + ] + ); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u16)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f16)"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 7) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vgetq_lane_u16(v: uint16x8_t) -> u16 { - static_assert_uimm_bits!(IMM5, 3); - unsafe { simd_extract!(v, IMM5 as u32) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vextq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7 + ] + ) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u32)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f16)"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 7) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vgetq_lane_u32(v: uint32x4_t) -> u32 { - static_assert_uimm_bits!(IMM5, 2); - unsafe { simd_extract!(v, IMM5 as u32) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vextq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7 + ] + ); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u64)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -14029,17 +14174,22 @@ pub fn vgetq_lane_u32(v: uint32x4_t) -> u32 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vgetq_lane_u64(v: uint64x2_t) -> u64 { - static_assert_uimm_bits!(IMM5, 2); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vextq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u8)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -14048,17 +14198,28 @@ pub fn vgetq_lane_u64(v: uint64x2_t) -> u64 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vgetq_lane_u8(v: uint8x16_t) -> u8 { - static_assert_uimm_bits!(IMM5, 4); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vextq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x4_t = + simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_p64)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -14067,17 +14228,22 @@ pub fn vgetq_lane_u8(v: uint8x16_t) -> u8 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_lane_p64(v: poly64x1_t) -> p64 { - static_assert!(IMM5 == 0); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vext_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s64)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -14086,17 +14252,28 @@ pub fn vget_lane_p64(v: poly64x1_t) -> p64 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_lane_s64(v: int64x1_t) -> i64 { - static_assert!(IMM5 == 0); - unsafe { simd_extract!(v, IMM5 as u32) } +pub fn vext_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int16x4_t = + simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Move vector element to general-purpose register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u64)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(1)] -#[cfg_attr(test, assert_instr(nop, IMM5 = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -14105,16 +14282,22 @@ pub fn vget_lane_s64(v: int64x1_t) -> i64 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_lane_u64(v: uint64x1_t) -> u64 { - static_assert!(IMM5 == 0); - unsafe { simd_extract!(v, 0) } +pub fn vextq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f32)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -14123,15 +14306,28 @@ pub fn vget_lane_u64(v: uint64x1_t) -> u64 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_low_f32(a: float32x4_t) -> float32x2_t { - unsafe { simd_shuffle!(a, a, [0, 1]) } +pub fn vextq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int32x4_t = + simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p16)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -14140,15 +14336,22 @@ pub fn vget_low_f32(a: float32x4_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_low_p16(a: poly16x8_t) -> poly16x4_t { - unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) } +pub fn vext_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p8)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -14157,15 +14360,28 @@ pub fn vget_low_p16(a: poly16x8_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_low_p8(a: poly8x16_t) -> poly8x8_t { - unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } +pub fn vext_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint16x4_t = + simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s16)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -14174,15 +14390,22 @@ pub fn vget_low_p8(a: poly8x16_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_low_s16(a: int16x8_t) -> int16x4_t { - unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) } +pub fn vextq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s32)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -14191,15 +14414,28 @@ pub fn vget_low_s16(a: int16x8_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_low_s32(a: int32x4_t) -> int32x2_t { - unsafe { simd_shuffle!(a, a, [0, 1]) } +pub fn vextq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint32x4_t = + simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s8)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -14208,15 +14444,22 @@ pub fn vget_low_s32(a: int32x4_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_low_s8(a: int8x16_t) -> int8x8_t { - unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } +pub fn vext_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u16)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 3) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -14225,15 +14468,28 @@ pub fn vget_low_s8(a: int8x16_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_low_u16(a: uint16x8_t) -> uint16x4_t { - unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) } +pub fn vext_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + static_assert_uimm_bits!(N, 2); + unsafe { + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: poly16x4_t = + simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u32)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 1) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -14242,15 +14498,22 @@ pub fn vget_low_u16(a: uint16x8_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_low_u32(a: uint32x4_t) -> uint32x2_t { - unsafe { simd_shuffle!(a, a, [0, 1]) } +pub fn vextq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u8)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 1) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -14259,15 +14522,27 @@ pub fn vget_low_u32(a: uint32x4_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_low_u8(a: uint8x16_t) -> uint8x8_t { - unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } +pub fn vextq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int64x2_t = simd_shuffle!(a, b, [N as u32, N as u32 + 1]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s64)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 1) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -14276,15 +14551,22 @@ pub fn vget_low_u8(a: uint8x16_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_low_s64(a: int64x2_t) -> int64x1_t { - unsafe { int64x1_t([simd_extract!(a, 0)]) } +pub fn vextq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u64)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ext, N = 1) +)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -14293,19 +14575,27 @@ pub fn vget_low_s64(a: int64x2_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vget_low_u64(a: uint64x2_t) -> uint64x1_t { - unsafe { uint64x1_t([simd_extract!(a, 0)]) } +pub fn vextq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(N, 1); + unsafe { + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint64x2_t = simd_shuffle!(a, b, [N as u32, N as u32 + 1]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s8)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shadd) + assert_instr(ext, N = 15) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -14314,27 +14604,45 @@ pub fn vget_low_u64(a: uint64x2_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.shadd.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i8")] - fn _vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; +pub fn vextq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { + simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7, + N as u32 + 8, + N as u32 + 9, + N as u32 + 10, + N as u32 + 11, + N as u32 + 12, + N as u32 + 13, + N as u32 + 14, + N as u32 + 15 + ] + ) } - unsafe { _vhadd_s8(a, b) } } -#[doc = "Halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s8)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shadd) + assert_instr(ext, N = 15) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -14343,27 +14651,54 @@ pub fn vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.shadd.v16i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v16i8")] - fn _vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; +pub fn vextq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7, + N as u32 + 8, + N as u32 + 9, + N as u32 + 10, + N as u32 + 11, + N as u32 + 12, + N as u32 + 13, + N as u32 + 14, + N as u32 + 15 + ] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } - unsafe { _vhaddq_s8(a, b) } } -#[doc = "Halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s16)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shadd) + assert_instr(ext, N = 15) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -14372,27 +14707,45 @@ pub fn vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.shadd.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i16")] - fn _vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; +pub fn vextq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { + simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7, + N as u32 + 8, + N as u32 + 9, + N as u32 + 10, + N as u32 + 11, + N as u32 + 12, + N as u32 + 13, + N as u32 + 14, + N as u32 + 15 + ] + ) } - unsafe { _vhadd_s16(a, b) } } -#[doc = "Halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s16)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shadd) + assert_instr(ext, N = 15) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -14401,27 +14754,54 @@ pub fn vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.shadd.v8i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i16")] - fn _vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; +pub fn vextq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7, + N as u32 + 8, + N as u32 + 9, + N as u32 + 10, + N as u32 + 11, + N as u32 + 12, + N as u32 + 13, + N as u32 + 14, + N as u32 + 15 + ] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } - unsafe { _vhaddq_s16(a, b) } } -#[doc = "Halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s32)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shadd) + assert_instr(ext, N = 15) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -14430,27 +14810,45 @@ pub fn vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.shadd.v2i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v2i32")] - fn _vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; +pub fn vextq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { + simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7, + N as u32 + 8, + N as u32 + 9, + N as u32 + 10, + N as u32 + 11, + N as u32 + 12, + N as u32 + 13, + N as u32 + 14, + N as u32 + 15 + ] + ) } - unsafe { _vhadd_s32(a, b) } } -#[doc = "Halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s32)"] +#[doc = "Extract vector from pair of vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shadd) + assert_instr(ext, N = 15) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -14459,84 +14857,95 @@ pub fn vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.shadd.v4i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i32")] - fn _vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; +pub fn vextq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + static_assert_uimm_bits!(N, 4); + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = simd_shuffle!( + a, + b, + [ + N as u32, + N as u32 + 1, + N as u32 + 2, + N as u32 + 3, + N as u32 + 4, + N as u32 + 5, + N as u32 + 6, + N as u32 + 7, + N as u32 + 8, + N as u32 + 9, + N as u32 + 10, + N as u32 + 11, + N as u32 + 12, + N as u32 + 13, + N as u32 + 14, + N as u32 + 15 + ] + ); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } - unsafe { _vhaddq_s32(a, b) } } -#[doc = "Halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u8)"] +#[doc = "Floating-point fused Multiply-Add to accumulator (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uhadd) + assert_instr(fmla) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uhadd.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i8")] - fn _vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - } - unsafe { _vhadd_u8(a, b) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vfma_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { + unsafe { simd_fma(b, c, a) } } -#[doc = "Halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u8)"] +#[doc = "Floating-point fused Multiply-Add to accumulator (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uhadd) + assert_instr(fmla) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uhadd.v16i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v16i8")] - fn _vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - } - unsafe { _vhaddq_u8(a, b) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vfmaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { + unsafe { simd_fma(b, c, a) } } -#[doc = "Halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u16)"] +#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uhadd) + assert_instr(fmla) )] #[cfg_attr( not(target_arch = "arm"), @@ -14546,26 +14955,18 @@ pub fn vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uhadd.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i16")] - fn _vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - } - unsafe { _vhadd_u16(a, b) } +pub fn vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + unsafe { simd_fma(b, c, a) } } -#[doc = "Halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u16)"] +#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uhadd) + assert_instr(fmla) )] #[cfg_attr( not(target_arch = "arm"), @@ -14575,26 +14976,18 @@ pub fn vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uhadd.v8i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i16")] - fn _vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - } - unsafe { _vhaddq_u16(a, b) } +pub fn vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + unsafe { simd_fma(b, c, a) } } -#[doc = "Halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u32)"] +#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_n_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uhadd) + assert_instr(fmla) )] #[cfg_attr( not(target_arch = "arm"), @@ -14604,26 +14997,18 @@ pub fn vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uhadd.v2i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v2i32")] - fn _vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - } - unsafe { _vhadd_u32(a, b) } +pub fn vfma_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { + vfma_f32(a, b, vdup_n_f32_vfp4(c)) } -#[doc = "Halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u32)"] +#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_n_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uhadd) + assert_instr(fmla) )] #[cfg_attr( not(target_arch = "arm"), @@ -14633,84 +15018,70 @@ pub fn vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uhadd.v4i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i32")] - fn _vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - } - unsafe { _vhaddq_u32(a, b) } +pub fn vfmaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { + vfmaq_f32(a, b, vdupq_n_f32_vfp4(c)) } -#[doc = "Signed halving subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s16)"] +#[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shsub) + assert_instr(fmls) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.shsub.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i16")] - fn _vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; +#[cfg(not(target_arch = "arm64ec"))] +pub fn vfms_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t { + unsafe { + let b: float16x4_t = simd_neg(b); + vfma_f16(a, b, c) } - unsafe { _vhsub_s16(a, b) } } -#[doc = "Signed halving subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s16)"] +#[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shsub) + assert_instr(fmls) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.shsub.v8i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i16")] - fn _vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; +#[cfg(not(target_arch = "arm64ec"))] +pub fn vfmsq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t { + unsafe { + let b: float16x8_t = simd_neg(b); + vfmaq_f16(a, b, c) } - unsafe { _vhsubq_s16(a, b) } } -#[doc = "Signed halving subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s32)"] +#[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shsub) + assert_instr(fmls) )] #[cfg_attr( not(target_arch = "arm"), @@ -14720,26 +15091,21 @@ pub fn vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.shsub.v2i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v2i32")] - fn _vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; +pub fn vfms_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + unsafe { + let b: float32x2_t = simd_neg(b); + vfma_f32(a, b, c) } - unsafe { _vhsub_s32(a, b) } } -#[doc = "Signed halving subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s32)"] +#[doc = "Floating-point fused multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shsub) + assert_instr(fmls) )] #[cfg_attr( not(target_arch = "arm"), @@ -14749,26 +15115,21 @@ pub fn vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.shsub.v4i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i32")] - fn _vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; +pub fn vfmsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + unsafe { + let b: float32x4_t = simd_neg(b); + vfmaq_f32(a, b, c) } - unsafe { _vhsubq_s32(a, b) } } -#[doc = "Signed halving subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s8)"] +#[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_n_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shsub) + assert_instr(fmls) )] #[cfg_attr( not(target_arch = "arm"), @@ -14778,26 +15139,18 @@ pub fn vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.shsub.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i8")] - fn _vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - } - unsafe { _vhsub_s8(a, b) } +pub fn vfms_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { + vfms_f32(a, b, vdup_n_f32_vfp4(c)) } -#[doc = "Signed halving subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s8)"] +#[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_n_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shsub) + assert_instr(fmls) )] #[cfg_attr( not(target_arch = "arm"), @@ -14807,142 +15160,107 @@ pub fn vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.shsub.v16i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v16i8")] - fn _vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - } - unsafe { _vhsubq_s8(a, b) } +pub fn vfmsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { + vfmsq_f32(a, b, vdupq_n_f32_vfp4(c)) } -#[doc = "Signed halving subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u8)"] +#[doc = "Duplicate vector element to vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uhsub) -)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uhsub.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i8")] - fn _vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - } - unsafe { _vhsub_u8(a, b) } +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(nop))] +pub fn vget_high_f16(a: float16x8_t) -> float16x4_t { + unsafe { simd_shuffle!(a, a, [4, 5, 6, 7]) } } -#[doc = "Signed halving subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u8)"] +#[doc = "Duplicate vector element to vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uhsub) -)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uhsub.v16i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v16i8")] - fn _vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(nop))] +pub fn vget_high_f16(a: float16x8_t) -> float16x4_t { + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vhsubq_u8(a, b) } } -#[doc = "Signed halving subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u16)"] +#[doc = "Duplicate vector element to vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uhsub) -)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uhsub.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i16")] - fn _vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - } - unsafe { _vhsub_u16(a, b) } +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(nop))] +pub fn vget_low_f16(a: float16x8_t) -> float16x4_t { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) } } -#[doc = "Signed halving subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u16)"] +#[doc = "Duplicate vector element to vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uhsub) -)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uhsub.v8i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i16")] - fn _vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(nop))] +pub fn vget_low_f16(a: float16x8_t) -> float16x4_t { + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vhsubq_u16(a, b) } } -#[doc = "Signed halving subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uhsub) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -14952,26 +15270,19 @@ pub fn vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uhsub.v2i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v2i32")] - fn _vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - } - unsafe { _vhsub_u32(a, b) } +pub fn vget_high_f32(a: float32x4_t) -> float32x2_t { + unsafe { simd_shuffle!(a, a, [2, 3]) } } -#[doc = "Signed halving subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uhsub) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -14981,66 +15292,45 @@ pub fn vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uhsub.v4i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i32")] - fn _vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; +pub fn vget_high_f32(a: float32x4_t) -> float32x2_t { + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float32x2_t = simd_shuffle!(a, a, [2, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } - unsafe { _vhsubq_u32(a, b) } } -#[doc = "Load one single-element structure and replicate to all lanes of one register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld1_dup_f16(ptr: *const f16) -> float16x4_t { - let x: float16x4_t = vld1_lane_f16::<0>(ptr, transmute(f16x4::splat(0.0))); - simd_shuffle!(x, x, [0, 0, 0, 0]) -} -#[doc = "Load one single-element structure and replicate to all lanes of one register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld1q_dup_f16(ptr: *const f16) -> float16x8_t { - let x: float16x8_t = vld1q_lane_f16::<0>(ptr, transmute(f16x8::splat(0.0))); - simd_shuffle!(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_high_p16(a: poly16x8_t) -> poly16x4_t { + unsafe { simd_shuffle!(a, a, [4, 5, 6, 7]) } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -15050,20 +15340,23 @@ pub unsafe fn vld1q_dup_f16(ptr: *const f16) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_dup_f32(ptr: *const f32) -> float32x2_t { - transmute(f32x2::splat(*ptr)) +pub fn vget_high_p16(a: poly16x8_t) -> poly16x4_t { + unsafe { + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -15073,20 +15366,19 @@ pub unsafe fn vld1_dup_f32(ptr: *const f32) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_dup_p16(ptr: *const p16) -> poly16x4_t { - transmute(u16x4::splat(*ptr)) +pub fn vget_high_p8(a: poly8x16_t) -> poly8x8_t { + unsafe { simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -15096,20 +15388,24 @@ pub unsafe fn vld1_dup_p16(ptr: *const p16) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_dup_p8(ptr: *const p8) -> poly8x8_t { - transmute(u8x8::splat(*ptr)) +pub fn vget_high_p8(a: poly8x16_t) -> poly8x8_t { + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -15119,20 +15415,19 @@ pub unsafe fn vld1_dup_p8(ptr: *const p8) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_dup_s16(ptr: *const i16) -> int16x4_t { - transmute(i16x4::splat(*ptr)) +pub fn vget_high_s16(a: int16x8_t) -> int16x4_t { + unsafe { simd_shuffle!(a, a, [4, 5, 6, 7]) } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -15142,20 +15437,23 @@ pub unsafe fn vld1_dup_s16(ptr: *const i16) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_dup_s32(ptr: *const i32) -> int32x2_t { - transmute(i32x2::splat(*ptr)) +pub fn vget_high_s16(a: int16x8_t) -> int16x4_t { + unsafe { + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -15165,20 +15463,19 @@ pub unsafe fn vld1_dup_s32(ptr: *const i32) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_dup_s8(ptr: *const i8) -> int8x8_t { - transmute(i8x8::splat(*ptr)) +pub fn vget_high_s32(a: int32x4_t) -> int32x2_t { + unsafe { simd_shuffle!(a, a, [2, 3]) } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -15188,20 +15485,23 @@ pub unsafe fn vld1_dup_s8(ptr: *const i8) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_dup_u16(ptr: *const u16) -> uint16x4_t { - transmute(u16x4::splat(*ptr)) +pub fn vget_high_s32(a: int32x4_t) -> int32x2_t { + unsafe { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: int32x2_t = simd_shuffle!(a, a, [2, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -15211,20 +15511,19 @@ pub unsafe fn vld1_dup_u16(ptr: *const u16) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_dup_u32(ptr: *const u32) -> uint32x2_t { - transmute(u32x2::splat(*ptr)) +pub fn vget_high_s8(a: int8x16_t) -> int8x8_t { + unsafe { simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -15234,20 +15533,24 @@ pub unsafe fn vld1_dup_u32(ptr: *const u32) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_dup_u8(ptr: *const u8) -> uint8x8_t { - transmute(u8x8::splat(*ptr)) +pub fn vget_high_s8(a: int8x16_t) -> int8x8_t { + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -15257,20 +15560,19 @@ pub unsafe fn vld1_dup_u8(ptr: *const u8) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_dup_f32(ptr: *const f32) -> float32x4_t { - transmute(f32x4::splat(*ptr)) +pub fn vget_high_u16(a: uint16x8_t) -> uint16x4_t { + unsafe { simd_shuffle!(a, a, [4, 5, 6, 7]) } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -15280,20 +15582,23 @@ pub unsafe fn vld1q_dup_f32(ptr: *const f32) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_dup_p16(ptr: *const p16) -> poly16x8_t { - transmute(u16x8::splat(*ptr)) +pub fn vget_high_u16(a: uint16x8_t) -> uint16x4_t { + unsafe { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -15303,20 +15608,19 @@ pub unsafe fn vld1q_dup_p16(ptr: *const p16) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_dup_p8(ptr: *const p8) -> poly8x16_t { - transmute(u8x16::splat(*ptr)) +pub fn vget_high_u32(a: uint32x4_t) -> uint32x2_t { + unsafe { simd_shuffle!(a, a, [2, 3]) } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -15326,20 +15630,23 @@ pub unsafe fn vld1q_dup_p8(ptr: *const p8) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_dup_s16(ptr: *const i16) -> int16x8_t { - transmute(i16x8::splat(*ptr)) +pub fn vget_high_u32(a: uint32x4_t) -> uint32x2_t { + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: uint32x2_t = simd_shuffle!(a, a, [2, 3]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -15349,20 +15656,19 @@ pub unsafe fn vld1q_dup_s16(ptr: *const i16) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_dup_s32(ptr: *const i32) -> int32x4_t { - transmute(i32x4::splat(*ptr)) +pub fn vget_high_u8(a: uint8x16_t) -> uint8x8_t { + unsafe { simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -15372,20 +15678,24 @@ pub unsafe fn vld1q_dup_s32(ptr: *const i32) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_dup_s64(ptr: *const i64) -> int64x2_t { - transmute(i64x2::splat(*ptr)) +pub fn vget_high_u8(a: uint8x16_t) -> uint8x8_t { + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -15395,20 +15705,19 @@ pub unsafe fn vld1q_dup_s64(ptr: *const i64) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_dup_s8(ptr: *const i8) -> int8x16_t { - transmute(i8x16::splat(*ptr)) +pub fn vget_high_p64(a: poly64x2_t) -> poly64x1_t { + unsafe { transmute(u64x1::new(simd_extract!(a, 1))) } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -15418,20 +15727,22 @@ pub unsafe fn vld1q_dup_s8(ptr: *const i8) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_dup_u16(ptr: *const u16) -> uint16x8_t { - transmute(u16x8::splat(*ptr)) +pub fn vget_high_p64(a: poly64x2_t) -> poly64x1_t { + unsafe { + let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); + transmute(u64x1::new(simd_extract!(a, 1))) + } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -15441,20 +15752,19 @@ pub unsafe fn vld1q_dup_u16(ptr: *const u16) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_dup_u32(ptr: *const u32) -> uint32x4_t { - transmute(u32x4::splat(*ptr)) +pub fn vget_high_s64(a: int64x2_t) -> int64x1_t { + unsafe { int64x1_t([simd_extract!(a, 1)]) } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -15464,20 +15774,22 @@ pub unsafe fn vld1q_dup_u32(ptr: *const u32) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_dup_u64(ptr: *const u64) -> uint64x2_t { - transmute(u64x2::splat(*ptr)) +pub fn vget_high_s64(a: int64x2_t) -> int64x1_t { + unsafe { + let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); + int64x1_t([simd_extract!(a, 1)]) + } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1r) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -15487,20 +15799,19 @@ pub unsafe fn vld1q_dup_u64(ptr: *const u64) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_dup_u8(ptr: *const u8) -> uint8x16_t { - transmute(u8x16::splat(*ptr)) +pub fn vget_high_u64(a: uint64x2_t) -> uint64x1_t { + unsafe { uint64x1_t([simd_extract!(a, 1)]) } } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u64)"] #[inline] -#[target_feature(enable = "neon,aes")] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ldr) + assert_instr(ext) )] #[cfg_attr( not(target_arch = "arm"), @@ -15510,30 +15821,122 @@ pub unsafe fn vld1q_dup_u8(ptr: *const u8) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_dup_p64(ptr: *const p64) -> poly64x1_t { - let x: poly64x1_t; - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - { - x = crate::core_arch::aarch64::vld1_p64(ptr); +pub fn vget_high_u64(a: uint64x2_t) -> uint64x1_t { + unsafe { + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + uint64x1_t([simd_extract!(a, 1)]) } - #[cfg(target_arch = "arm")] - { - x = crate::core_arch::arm::vld1_p64(ptr); - }; - x } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_f16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ldr) + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vget_lane_f16(a: float16x4_t) -> f16 { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_extract!(a, LANE as u32) } +} +#[doc = "Duplicate vector element to scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vget_lane_f16(a: float16x4_t) -> f16 { + static_assert_uimm_bits!(LANE, 2); + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + simd_extract!(a, LANE as u32) + } +} +#[doc = "Duplicate vector element to scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vgetq_lane_f16(a: float16x8_t) -> f16 { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_extract!(a, LANE as u32) } +} +#[doc = "Duplicate vector element to scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vgetq_lane_f16(a: float16x8_t) -> f16 { + static_assert_uimm_bits!(LANE, 3); + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + simd_extract!(a, LANE as u32) + } +} +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_f32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] +pub fn vget_lane_f32(v: float32x2_t) -> f32 { + static_assert_uimm_bits!(IMM5, 1); + unsafe { simd_extract!(v, IMM5 as u32) } +} +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_f32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -15542,30 +15945,41 @@ pub unsafe fn vld1_dup_p64(ptr: *const p64) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_dup_s64(ptr: *const i64) -> int64x1_t { - let x: int64x1_t; - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - { - x = crate::core_arch::aarch64::vld1_s64(ptr); +pub fn vget_lane_f32(v: float32x2_t) -> f32 { + static_assert_uimm_bits!(IMM5, 1); + unsafe { + let v: float32x2_t = simd_shuffle!(v, v, [1, 0]); + simd_extract!(v, IMM5 as u32) } - #[cfg(target_arch = "arm")] - { - x = crate::core_arch::arm::vld1_s64(ptr); - }; - x } -#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ldr) + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] +pub fn vget_lane_p16(v: poly16x4_t) -> p16 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { simd_extract!(v, IMM5 as u32) } +} +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -15574,407 +15988,236 @@ pub unsafe fn vld1_dup_s64(ptr: *const i64) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_dup_u64(ptr: *const u64) -> uint64x1_t { - let x: uint64x1_t; - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - { - x = crate::core_arch::aarch64::vld1_u64(ptr); +pub fn vget_lane_p16(v: poly16x4_t) -> p16 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { + let v: poly16x4_t = simd_shuffle!(v, v, [3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) } - #[cfg(target_arch = "arm")] - { - x = crate::core_arch::arm::vld1_u64(ptr); - }; - x } -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_p8)"] #[inline] #[cfg(target_endian = "little")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] -pub unsafe fn vld1_f16(ptr: *const f16) -> float16x4_t { - transmute(vld1_v4f16( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_lane_p8(v: poly8x8_t) -> p8 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_p8)"] #[inline] #[cfg(target_endian = "big")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] -pub unsafe fn vld1_f16(ptr: *const f16) -> float16x4_t { - let ret_val: float16x4_t = transmute(vld1_v4f16( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_lane_p8(v: poly8x8_t) -> p8 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { + let v: poly8x8_t = simd_shuffle!(v, v, [7, 6, 5, 4, 3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s16)"] #[inline] #[cfg(target_endian = "little")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] -pub unsafe fn vld1q_f16(ptr: *const f16) -> float16x8_t { - transmute(vld1q_v8f16( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_lane_s16(v: int16x4_t) -> i16 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s16)"] #[inline] #[cfg(target_endian = "big")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] -pub unsafe fn vld1q_f16(ptr: *const f16) -> float16x8_t { - let ret_val: float16x8_t = transmute(vld1q_v8f16( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_lane_s16(v: int16x4_t) -> i16 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { + let v: int16x4_t = simd_shuffle!(v, v, [3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld1_f16_x2(a: *const f16) -> float16x4x2_t { - crate::ptr::read_unaligned(a.cast()) +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_lane_s32(v: int32x2_t) -> i32 { + static_assert_uimm_bits!(IMM5, 1); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld1_f16_x3(a: *const f16) -> float16x4x3_t { - crate::ptr::read_unaligned(a.cast()) +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_lane_s32(v: int32x2_t) -> i32 { + static_assert_uimm_bits!(IMM5, 1); + unsafe { + let v: int32x2_t = simd_shuffle!(v, v, [1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld1_f16_x4(a: *const f16) -> float16x4x4_t { - crate::ptr::read_unaligned(a.cast()) +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_lane_s8(v: int8x8_t) -> i8 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld1q_f16_x2(a: *const f16) -> float16x8x2_t { - crate::ptr::read_unaligned(a.cast()) +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_lane_s8(v: int8x8_t) -> i8 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { + let v: int8x8_t = simd_shuffle!(v, v, [7, 6, 5, 4, 3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld1q_f16_x3(a: *const f16) -> float16x8x3_t { - crate::ptr::read_unaligned(a.cast()) +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_lane_u16(v: uint16x4_t) -> u16 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld1q_f16_x4(a: *const f16) -> float16x8x4_t { - crate::ptr::read_unaligned(a.cast()) +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vget_lane_u16(v: uint16x4_t) -> u16 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { + let v: uint16x4_t = simd_shuffle!(v, v, [3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] -pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1_v2f32::(ptr as *const i8)) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] -pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1q_v4f32::(ptr as *const i8)) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] -pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1_v8i8::(ptr as *const i8)) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] -pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1q_v16i8::(ptr as *const i8)) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] -pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1_v4i16::(ptr as *const i8)) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] -pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1q_v8i16::(ptr as *const i8)) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] -pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1_v2i32::(ptr as *const i8)) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] -pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1q_v4i32::(ptr as *const i8)) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] -pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1_v1i64::(ptr as *const i8)) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))] -pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1q_v2i64::(ptr as *const i8)) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] -pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1_v8i8::(ptr as *const i8)) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] -pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1q_v16i8::(ptr as *const i8)) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] -pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1_v4i16::(ptr as *const i8)) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] -pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1q_v8i16::(ptr as *const i8)) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,aes")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))] -pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - transmute(vld1q_v2i64::(ptr as *const i8)) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -15983,21 +16226,18 @@ pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_f32_x2(a: *const f32) -> float32x2x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_lane_u32(v: uint32x2_t) -> u32 { + static_assert_uimm_bits!(IMM5, 1); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16006,21 +16246,21 @@ pub unsafe fn vld1_f32_x2(a: *const f32) -> float32x2x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_f32_x3(a: *const f32) -> float32x2x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_lane_u32(v: uint32x2_t) -> u32 { + static_assert_uimm_bits!(IMM5, 1); + unsafe { + let v: uint32x2_t = simd_shuffle!(v, v, [1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16029,21 +16269,18 @@ pub unsafe fn vld1_f32_x3(a: *const f32) -> float32x2x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_f32_x4(a: *const f32) -> float32x2x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_lane_u8(v: uint8x8_t) -> u8 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16052,21 +16289,21 @@ pub unsafe fn vld1_f32_x4(a: *const f32) -> float32x2x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_f32_x2(a: *const f32) -> float32x4x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_lane_u8(v: uint8x8_t) -> u8 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { + let v: uint8x8_t = simd_shuffle!(v, v, [7, 6, 5, 4, 3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16075,21 +16312,18 @@ pub unsafe fn vld1q_f32_x2(a: *const f32) -> float32x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_f32_x3(a: *const f32) -> float32x4x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vgetq_lane_f32(v: float32x4_t) -> f32 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16098,62 +16332,41 @@ pub unsafe fn vld1q_f32_x3(a: *const f32) -> float32x4x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_f32_x4(a: *const f32) -> float32x4x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vgetq_lane_f32(v: float32x4_t) -> f32 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { + let v: float32x4_t = simd_shuffle!(v, v, [3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load one single-element structure to one lane of one register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1, LANE = 0))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 0) + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") )] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld1_lane_f16(ptr: *const f16, src: float16x4_t) -> float16x4_t { - static_assert_uimm_bits!(LANE, 2); - simd_insert!(src, LANE as u32, *ptr) -} -#[doc = "Load one single-element structure to one lane of one register"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1, LANE = 0))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 0) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld1q_lane_f16(ptr: *const f16, src: float16x8_t) -> float16x8_t { - static_assert_uimm_bits!(LANE, 3); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_p16(v: poly16x8_t) -> p16 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 1) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16162,23 +16375,21 @@ pub unsafe fn vld1q_lane_f16(ptr: *const f16, src: float16x8_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_lane_f32(ptr: *const f32, src: float32x2_t) -> float32x2_t { - static_assert_uimm_bits!(LANE, 1); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_p16(v: poly16x8_t) -> p16 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { + let v: poly16x8_t = simd_shuffle!(v, v, [7, 6, 5, 4, 3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 3) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16187,23 +16398,18 @@ pub unsafe fn vld1_lane_f32(ptr: *const f32, src: float32x2_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_lane_p16(ptr: *const p16, src: poly16x4_t) -> poly16x4_t { - static_assert_uimm_bits!(LANE, 2); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_p64(v: poly64x2_t) -> p64 { + static_assert_uimm_bits!(IMM5, 1); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 7))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 7) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16212,23 +16418,21 @@ pub unsafe fn vld1_lane_p16(ptr: *const p16, src: poly16x4_t) - target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_lane_p8(ptr: *const p8, src: poly8x8_t) -> poly8x8_t { - static_assert_uimm_bits!(LANE, 3); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_p64(v: poly64x2_t) -> p64 { + static_assert_uimm_bits!(IMM5, 1); + unsafe { + let v: poly64x2_t = simd_shuffle!(v, v, [1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 3) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16237,23 +16441,18 @@ pub unsafe fn vld1_lane_p8(ptr: *const p8, src: poly8x8_t) -> p target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_lane_s16(ptr: *const i16, src: int16x4_t) -> int16x4_t { - static_assert_uimm_bits!(LANE, 2); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_p8(v: poly8x16_t) -> p8 { + static_assert_uimm_bits!(IMM5, 4); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 1) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16262,23 +16461,22 @@ pub unsafe fn vld1_lane_s16(ptr: *const i16, src: int16x4_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_lane_s32(ptr: *const i32, src: int32x2_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 1); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_p8(v: poly8x16_t) -> p8 { + static_assert_uimm_bits!(IMM5, 4); + unsafe { + let v: poly8x16_t = + simd_shuffle!(v, v, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ldr, LANE = 0) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16287,23 +16485,18 @@ pub unsafe fn vld1_lane_s32(ptr: *const i32, src: int32x2_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_lane_s64(ptr: *const i64, src: int64x1_t) -> int64x1_t { - static_assert!(LANE == 0); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_s16(v: int16x8_t) -> i16 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 7))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 7) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16312,23 +16505,21 @@ pub unsafe fn vld1_lane_s64(ptr: *const i64, src: int64x1_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_lane_s8(ptr: *const i8, src: int8x8_t) -> int8x8_t { - static_assert_uimm_bits!(LANE, 3); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_s16(v: int16x8_t) -> i16 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { + let v: int16x8_t = simd_shuffle!(v, v, [7, 6, 5, 4, 3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 3) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16337,23 +16528,18 @@ pub unsafe fn vld1_lane_s8(ptr: *const i8, src: int8x8_t) -> in target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_lane_u16(ptr: *const u16, src: uint16x4_t) -> uint16x4_t { - static_assert_uimm_bits!(LANE, 2); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_s32(v: int32x4_t) -> i32 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 1) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16362,23 +16548,21 @@ pub unsafe fn vld1_lane_u16(ptr: *const u16, src: uint16x4_t) - target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_lane_u32(ptr: *const u32, src: uint32x2_t) -> uint32x2_t { - static_assert_uimm_bits!(LANE, 1); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_s32(v: int32x4_t) -> i32 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { + let v: int32x4_t = simd_shuffle!(v, v, [3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ldr, LANE = 0) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16387,23 +16571,18 @@ pub unsafe fn vld1_lane_u32(ptr: *const u32, src: uint32x2_t) - target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_lane_u64(ptr: *const u64, src: uint64x1_t) -> uint64x1_t { - static_assert!(LANE == 0); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_s64(v: int64x2_t) -> i64 { + static_assert_uimm_bits!(IMM5, 1); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 7))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 7) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16412,23 +16591,21 @@ pub unsafe fn vld1_lane_u64(ptr: *const u64, src: uint64x1_t) - target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_lane_u8(ptr: *const u8, src: uint8x8_t) -> uint8x8_t { - static_assert_uimm_bits!(LANE, 3); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_s64(v: int64x2_t) -> i64 { + static_assert_uimm_bits!(IMM5, 1); + unsafe { + let v: int64x2_t = simd_shuffle!(v, v, [1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 3) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16437,23 +16614,18 @@ pub unsafe fn vld1_lane_u8(ptr: *const u8, src: uint8x8_t) -> u target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_lane_f32(ptr: *const f32, src: float32x4_t) -> float32x4_t { - static_assert_uimm_bits!(LANE, 2); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_s8(v: int8x16_t) -> i8 { + static_assert_uimm_bits!(IMM5, 4); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 7))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 7) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16462,23 +16634,22 @@ pub unsafe fn vld1q_lane_f32(ptr: *const f32, src: float32x4_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_lane_p16(ptr: *const p16, src: poly16x8_t) -> poly16x8_t { - static_assert_uimm_bits!(LANE, 3); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_s8(v: int8x16_t) -> i8 { + static_assert_uimm_bits!(IMM5, 4); + unsafe { + let v: int8x16_t = + simd_shuffle!(v, v, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 15))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 15) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16487,23 +16658,18 @@ pub unsafe fn vld1q_lane_p16(ptr: *const p16, src: poly16x8_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_lane_p8(ptr: *const p8, src: poly8x16_t) -> poly8x16_t { - static_assert_uimm_bits!(LANE, 4); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_u16(v: uint16x8_t) -> u16 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 7))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 7) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16512,23 +16678,21 @@ pub unsafe fn vld1q_lane_p8(ptr: *const p8, src: poly8x16_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_lane_s16(ptr: *const i16, src: int16x8_t) -> int16x8_t { - static_assert_uimm_bits!(LANE, 3); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_u16(v: uint16x8_t) -> u16 { + static_assert_uimm_bits!(IMM5, 3); + unsafe { + let v: uint16x8_t = simd_shuffle!(v, v, [7, 6, 5, 4, 3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 3) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16537,23 +16701,18 @@ pub unsafe fn vld1q_lane_s16(ptr: *const i16, src: int16x8_t) - target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_lane_s32(ptr: *const i32, src: int32x4_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 2); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_u32(v: uint32x4_t) -> u32 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 1) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16562,23 +16721,21 @@ pub unsafe fn vld1q_lane_s32(ptr: *const i32, src: int32x4_t) - target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_lane_s64(ptr: *const i64, src: int64x2_t) -> int64x2_t { - static_assert_uimm_bits!(LANE, 1); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_u32(v: uint32x4_t) -> u32 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { + let v: uint32x4_t = simd_shuffle!(v, v, [3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 15))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 15) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16587,23 +16744,18 @@ pub unsafe fn vld1q_lane_s64(ptr: *const i64, src: int64x2_t) - target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_lane_s8(ptr: *const i8, src: int8x16_t) -> int8x16_t { - static_assert_uimm_bits!(LANE, 4); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_u64(v: uint64x2_t) -> u64 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 7))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 7) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16612,23 +16764,21 @@ pub unsafe fn vld1q_lane_s8(ptr: *const i8, src: int8x16_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_lane_u16(ptr: *const u16, src: uint16x8_t) -> uint16x8_t { - static_assert_uimm_bits!(LANE, 3); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_u64(v: uint64x2_t) -> u64 { + static_assert_uimm_bits!(IMM5, 2); + unsafe { + let v: uint64x2_t = simd_shuffle!(v, v, [1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 3) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16637,23 +16787,18 @@ pub unsafe fn vld1q_lane_u16(ptr: *const u16, src: uint16x8_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_lane_u32(ptr: *const u32, src: uint32x4_t) -> uint32x4_t { - static_assert_uimm_bits!(LANE, 2); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_u8(v: uint8x16_t) -> u8 { + static_assert_uimm_bits!(IMM5, 4); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 1) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16662,23 +16807,21 @@ pub unsafe fn vld1q_lane_u32(ptr: *const u32, src: uint32x4_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_lane_u64(ptr: *const u64, src: uint64x2_t) -> uint64x2_t { - static_assert_uimm_bits!(LANE, 1); - simd_insert!(src, LANE as u32, *ptr) +pub fn vgetq_lane_u8(v: uint8x16_t) -> u8 { + static_assert_uimm_bits!(IMM5, 4); + unsafe { + let v: uint8x16_t = + simd_shuffle!(v, v, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + simd_extract!(v, IMM5 as u32) + } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_p64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 15))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 15) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 0))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16687,23 +16830,17 @@ pub unsafe fn vld1q_lane_u64(ptr: *const u64, src: uint64x2_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_lane_u8(ptr: *const u8, src: uint8x16_t) -> uint8x16_t { - static_assert_uimm_bits!(LANE, 4); - simd_insert!(src, LANE as u32, *ptr) +pub fn vget_lane_p64(v: poly64x1_t) -> p64 { + static_assert!(IMM5 == 0); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s64)"] #[inline] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ldr, LANE = 0) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 0))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16712,23 +16849,17 @@ pub unsafe fn vld1q_lane_u8(ptr: *const u8, src: uint8x16_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_lane_p64(ptr: *const p64, src: poly64x1_t) -> poly64x1_t { - static_assert!(LANE == 0); - simd_insert!(src, LANE as u32, *ptr) +pub fn vget_lane_s64(v: int64x1_t) -> i64 { + static_assert!(IMM5 == 0); + unsafe { simd_extract!(v, IMM5 as u32) } } -#[doc = "Load one single-element structure to one lane of one register."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Move vector element to general-purpose register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u64)"] #[inline] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld1, LANE = 1) -)] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 0))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16737,40 +16868,17 @@ pub unsafe fn vld1_lane_p64(ptr: *const p64, src: poly64x1_t) - target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_lane_p64(ptr: *const p64, src: poly64x2_t) -> poly64x2_t { - static_assert_uimm_bits!(LANE, 1); - simd_insert!(src, LANE as u32, *ptr) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,aes")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] -pub unsafe fn vld1_p64(ptr: *const p64) -> poly64x1_t { - let a: *const i8 = ptr as *const i8; - let b: i32 = crate::mem::align_of::() as i32; - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v1i64")] - fn _vld1_v1i64(a: *const i8, b: i32) -> int64x1_t; - } - transmute(_vld1_v1i64(a, b)) +pub fn vget_lane_u64(v: uint64x1_t) -> u64 { + static_assert!(IMM5 == 0); + unsafe { simd_extract!(v, 0) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f32)"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16779,21 +16887,16 @@ pub unsafe fn vld1_p64(ptr: *const p64) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_p64_x2(a: *const p64) -> poly64x1x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_f32(a: float32x4_t) -> float32x2_t { + unsafe { simd_shuffle!(a, a, [0, 1]) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f32)"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16802,21 +16905,20 @@ pub unsafe fn vld1_p64_x2(a: *const p64) -> poly64x1x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_p64_x3(a: *const p64) -> poly64x1x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_f32(a: float32x4_t) -> float32x2_t { + unsafe { + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float32x2_t = simd_shuffle!(a, a, [0, 1]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p16)"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16825,21 +16927,16 @@ pub unsafe fn vld1_p64_x3(a: *const p64) -> poly64x1x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_p64_x4(a: *const p64) -> poly64x1x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_p16(a: poly16x8_t) -> poly16x4_t { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p16)"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16848,21 +16945,20 @@ pub unsafe fn vld1_p64_x4(a: *const p64) -> poly64x1x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_p64_x2(a: *const p64) -> poly64x2x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_p16(a: poly16x8_t) -> poly16x4_t { + unsafe { + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p8)"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16871,21 +16967,16 @@ pub unsafe fn vld1q_p64_x2(a: *const p64) -> poly64x2x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_p8(a: poly8x16_t) -> poly8x8_t { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p8)"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -16894,125 +16985,21 @@ pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_p64_x4(a: *const p64) -> poly64x2x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_p8(a: poly8x16_t) -> poly8x8_t { + unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] -pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vld1_v8i8::(ptr as *const i8) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] -pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vld1q_v16i8::(ptr as *const i8) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] -pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vld1_v4i16::(ptr as *const i8) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] -pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vld1q_v8i16::(ptr as *const i8) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] -pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vld1_v2i32::(ptr as *const i8) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] -pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vld1q_v4i32::(ptr as *const i8) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] -pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vld1_v1i64::(ptr as *const i8) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))] -pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vld1q_v2i64::(ptr as *const i8) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -17021,21 +17008,16 @@ pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_s8_x2(a: *const i8) -> int8x8x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_s16(a: int16x8_t) -> int16x4_t { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -17044,21 +17026,20 @@ pub unsafe fn vld1_s8_x2(a: *const i8) -> int8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_s8_x3(a: *const i8) -> int8x8x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_s16(a: int16x8_t) -> int16x4_t { + unsafe { + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -17067,21 +17048,16 @@ pub unsafe fn vld1_s8_x3(a: *const i8) -> int8x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_s8_x4(a: *const i8) -> int8x8x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_s32(a: int32x4_t) -> int32x2_t { + unsafe { simd_shuffle!(a, a, [0, 1]) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -17090,21 +17066,20 @@ pub unsafe fn vld1_s8_x4(a: *const i8) -> int8x8x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_s8_x2(a: *const i8) -> int8x16x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_s32(a: int32x4_t) -> int32x2_t { + unsafe { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: int32x2_t = simd_shuffle!(a, a, [0, 1]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -17113,21 +17088,16 @@ pub unsafe fn vld1q_s8_x2(a: *const i8) -> int8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_s8_x3(a: *const i8) -> int8x16x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_s8(a: int8x16_t) -> int8x8_t { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -17136,21 +17106,21 @@ pub unsafe fn vld1q_s8_x3(a: *const i8) -> int8x16x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_s8_x4(a: *const i8) -> int8x16x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_s8(a: int8x16_t) -> int8x8_t { + unsafe { + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -17159,21 +17129,16 @@ pub unsafe fn vld1q_s8_x4(a: *const i8) -> int8x16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_s16_x2(a: *const i16) -> int16x4x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_u16(a: uint16x8_t) -> uint16x4_t { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -17182,21 +17147,20 @@ pub unsafe fn vld1_s16_x2(a: *const i16) -> int16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_s16_x3(a: *const i16) -> int16x4x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_u16(a: uint16x8_t) -> uint16x4_t { + unsafe { + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -17205,21 +17169,16 @@ pub unsafe fn vld1_s16_x3(a: *const i16) -> int16x4x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_s16_x4(a: *const i16) -> int16x4x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_u32(a: uint32x4_t) -> uint32x2_t { + unsafe { simd_shuffle!(a, a, [0, 1]) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -17228,21 +17187,20 @@ pub unsafe fn vld1_s16_x4(a: *const i16) -> int16x4x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_s16_x2(a: *const i16) -> int16x8x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_u32(a: uint32x4_t) -> uint32x2_t { + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: uint32x2_t = simd_shuffle!(a, a, [0, 1]); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -17251,21 +17209,16 @@ pub unsafe fn vld1q_s16_x2(a: *const i16) -> int16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_s16_x3(a: *const i16) -> int16x8x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_u8(a: uint8x16_t) -> uint8x8_t { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -17274,21 +17227,21 @@ pub unsafe fn vld1q_s16_x3(a: *const i16) -> int16x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_s16_x4(a: *const i16) -> int16x8x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_u8(a: uint8x16_t) -> uint8x8_t { + unsafe { + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -17297,21 +17250,16 @@ pub unsafe fn vld1q_s16_x4(a: *const i16) -> int16x8x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_s32_x2(a: *const i32) -> int32x2x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_p64(a: poly64x2_t) -> poly64x1_t { + unsafe { transmute(u64x1::new(simd_extract!(a, 0))) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -17320,21 +17268,19 @@ pub unsafe fn vld1_s32_x2(a: *const i32) -> int32x2x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_s32_x3(a: *const i32) -> int32x2x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_p64(a: poly64x2_t) -> poly64x1_t { + unsafe { + let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); + transmute(u64x1::new(simd_extract!(a, 0))) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -17343,21 +17289,16 @@ pub unsafe fn vld1_s32_x3(a: *const i32) -> int32x2x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_s32_x4(a: *const i32) -> int32x2x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_s64(a: int64x2_t) -> int64x1_t { + unsafe { int64x1_t([simd_extract!(a, 0)]) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -17366,21 +17307,19 @@ pub unsafe fn vld1_s32_x4(a: *const i32) -> int32x2x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_s32_x2(a: *const i32) -> int32x4x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_s64(a: int64x2_t) -> int64x1_t { + unsafe { + let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); + int64x1_t([simd_extract!(a, 0)]) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u64)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -17389,21 +17328,16 @@ pub unsafe fn vld1q_s32_x2(a: *const i32) -> int32x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_s32_x3(a: *const i32) -> int32x4x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_u64(a: uint64x2_t) -> uint64x1_t { + unsafe { uint64x1_t([simd_extract!(a, 0)]) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u64)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] +#[cfg_attr(test, assert_instr(nop))] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -17412,20 +17346,21 @@ pub unsafe fn vld1q_s32_x3(a: *const i32) -> int32x4x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_s32_x4(a: *const i32) -> int32x4x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vget_low_u64(a: uint64x2_t) -> uint64x1_t { + unsafe { + let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); + uint64x1_t([simd_extract!(a, 0)]) + } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(shadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -17435,20 +17370,26 @@ pub unsafe fn vld1q_s32_x4(a: *const i32) -> int32x4x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_s64_x2(a: *const i64) -> int64x1x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shadd.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i8")] + fn _vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { _vhadd_s8(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(shadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -17458,20 +17399,26 @@ pub unsafe fn vld1_s64_x2(a: *const i64) -> int64x1x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_s64_x3(a: *const i64) -> int64x1x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shadd.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v16i8")] + fn _vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } + unsafe { _vhaddq_s8(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(shadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -17481,20 +17428,26 @@ pub unsafe fn vld1_s64_x3(a: *const i64) -> int64x1x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_s64_x4(a: *const i64) -> int64x1x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shadd.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i16")] + fn _vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vhadd_s16(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(shadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -17504,20 +17457,26 @@ pub unsafe fn vld1_s64_x4(a: *const i64) -> int64x1x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_s64_x2(a: *const i64) -> int64x2x2_t { - crate::ptr::read_unaligned(a.cast()) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +pub fn vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shadd.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i16")] + fn _vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { _vhaddq_s16(a, b) } +} +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(shadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -17527,20 +17486,26 @@ pub unsafe fn vld1q_s64_x2(a: *const i64) -> int64x2x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_s64_x3(a: *const i64) -> int64x2x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shadd.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v2i32")] + fn _vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vhadd_s32(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(shadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -17550,20 +17515,26 @@ pub unsafe fn vld1q_s64_x3(a: *const i64) -> int64x2x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_s64_x4(a: *const i64) -> int64x2x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shadd.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i32")] + fn _vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { _vhaddq_s32(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(uhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -17573,20 +17544,26 @@ pub unsafe fn vld1q_s64_x4(a: *const i64) -> int64x2x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u8_x2(a: *const u8) -> uint8x8x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhadd.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i8")] + fn _vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } + unsafe { _vhadd_u8(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(uhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -17596,20 +17573,26 @@ pub unsafe fn vld1_u8_x2(a: *const u8) -> uint8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhadd.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v16i8")] + fn _vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } + unsafe { _vhaddq_u8(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(uhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -17619,20 +17602,26 @@ pub unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhadd.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i16")] + fn _vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } + unsafe { _vhadd_u16(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(uhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -17642,20 +17631,26 @@ pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhadd.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i16")] + fn _vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } + unsafe { _vhaddq_u16(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(uhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -17665,20 +17660,26 @@ pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhadd.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v2i32")] + fn _vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } + unsafe { _vhadd_u32(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(uhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -17688,20 +17689,26 @@ pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhadd.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i32")] + fn _vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } + unsafe { _vhaddq_u32(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(shsub) )] #[cfg_attr( not(target_arch = "arm"), @@ -17711,20 +17718,26 @@ pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shsub.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i16")] + fn _vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vhsub_s16(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(shsub) )] #[cfg_attr( not(target_arch = "arm"), @@ -17734,20 +17747,26 @@ pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shsub.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i16")] + fn _vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { _vhsubq_s16(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(shsub) )] #[cfg_attr( not(target_arch = "arm"), @@ -17757,20 +17776,26 @@ pub unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shsub.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v2i32")] + fn _vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vhsub_s32(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(shsub) )] #[cfg_attr( not(target_arch = "arm"), @@ -17780,20 +17805,26 @@ pub unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shsub.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i32")] + fn _vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { _vhsubq_s32(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(shsub) )] #[cfg_attr( not(target_arch = "arm"), @@ -17803,20 +17834,26 @@ pub unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shsub.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i8")] + fn _vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { _vhsub_s8(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(shsub) )] #[cfg_attr( not(target_arch = "arm"), @@ -17826,20 +17863,26 @@ pub unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.shsub.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v16i8")] + fn _vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } + unsafe { _vhsubq_s8(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(uhsub) )] #[cfg_attr( not(target_arch = "arm"), @@ -17849,20 +17892,26 @@ pub unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhsub.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i8")] + fn _vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } + unsafe { _vhsub_u8(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(uhsub) )] #[cfg_attr( not(target_arch = "arm"), @@ -17872,20 +17921,26 @@ pub unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhsub.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v16i8")] + fn _vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } + unsafe { _vhsubq_u8(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(uhsub) )] #[cfg_attr( not(target_arch = "arm"), @@ -17895,20 +17950,26 @@ pub unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhsub.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i16")] + fn _vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } + unsafe { _vhsub_u16(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(uhsub) )] #[cfg_attr( not(target_arch = "arm"), @@ -17918,20 +17979,26 @@ pub unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_u32_x2(a: *const u32) -> uint32x4x2_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhsub.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i16")] + fn _vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } + unsafe { _vhsubq_u16(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(uhsub) )] #[cfg_attr( not(target_arch = "arm"), @@ -17941,20 +18008,26 @@ pub unsafe fn vld1q_u32_x2(a: *const u32) -> uint32x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhsub.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v2i32")] + fn _vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } + unsafe { _vhsub_u32(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed halving subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(uhsub) )] #[cfg_attr( not(target_arch = "arm"), @@ -17964,43 +18037,66 @@ pub unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_u32_x4(a: *const u32) -> uint32x4x4_t { - crate::ptr::read_unaligned(a.cast()) +pub fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uhsub.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i32")] + fn _vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } + unsafe { _vhsubq_u32(a, b) } } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x2)"] +#[doc = "Load one single-element structure and replicate to all lanes of one register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + assert_instr(ld1r) )] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld1_dup_f16(ptr: *const f16) -> float16x4_t { + let x: float16x4_t = vld1_lane_f16::<0>(ptr, transmute(f16x4::splat(0.0))); + simd_shuffle!(x, x, [0, 0, 0, 0]) +} +#[doc = "Load one single-element structure and replicate to all lanes of one register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] #[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) )] -pub unsafe fn vld1_u64_x2(a: *const u64) -> uint64x1x2_t { - crate::ptr::read_unaligned(a.cast()) +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld1q_dup_f16(ptr: *const f16) -> float16x8_t { + let x: float16x8_t = vld1q_lane_f16::<0>(ptr, transmute(f16x8::splat(0.0))); + simd_shuffle!(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x3)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -18010,20 +18106,20 @@ pub unsafe fn vld1_u64_x2(a: *const u64) -> uint64x1x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u64_x3(a: *const u64) -> uint64x1x3_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1_dup_f32(ptr: *const f32) -> float32x2_t { + transmute(f32x2::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x4)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -18033,20 +18129,20 @@ pub unsafe fn vld1_u64_x3(a: *const u64) -> uint64x1x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_u64_x4(a: *const u64) -> uint64x1x4_t { - crate::ptr::read_unaligned(a.cast()) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x2)"] +pub unsafe fn vld1_dup_p16(ptr: *const p16) -> poly16x4_t { + transmute(u16x4::splat(*ptr)) +} +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -18056,20 +18152,20 @@ pub unsafe fn vld1_u64_x4(a: *const u64) -> uint64x1x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_u64_x2(a: *const u64) -> uint64x2x2_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1_dup_p8(ptr: *const p8) -> poly8x8_t { + transmute(u8x8::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x3)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -18079,20 +18175,20 @@ pub unsafe fn vld1q_u64_x2(a: *const u64) -> uint64x2x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_u64_x3(a: *const u64) -> uint64x2x3_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1_dup_s16(ptr: *const i16) -> int16x4_t { + transmute(i16x4::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x4)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -18102,20 +18198,20 @@ pub unsafe fn vld1q_u64_x3(a: *const u64) -> uint64x2x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_u64_x4(a: *const u64) -> uint64x2x4_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1_dup_s32(ptr: *const i32) -> int32x2_t { + transmute(i32x2::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x2)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -18125,20 +18221,20 @@ pub unsafe fn vld1q_u64_x4(a: *const u64) -> uint64x2x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_p8_x2(a: *const p8) -> poly8x8x2_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1_dup_s8(ptr: *const i8) -> int8x8_t { + transmute(i8x8::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x3)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -18148,20 +18244,20 @@ pub unsafe fn vld1_p8_x2(a: *const p8) -> poly8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1_dup_u16(ptr: *const u16) -> uint16x4_t { + transmute(u16x4::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x4)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -18171,20 +18267,20 @@ pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1_dup_u32(ptr: *const u32) -> uint32x2_t { + transmute(u32x2::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x2)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -18194,20 +18290,20 @@ pub unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1_dup_u8(ptr: *const u8) -> uint8x8_t { + transmute(u8x8::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x3)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -18217,20 +18313,20 @@ pub unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1q_dup_f32(ptr: *const f32) -> float32x4_t { + transmute(f32x4::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x4)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -18240,20 +18336,20 @@ pub unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1q_dup_p16(ptr: *const p16) -> poly16x8_t { + transmute(u16x8::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x2)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -18263,20 +18359,20 @@ pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1q_dup_p8(ptr: *const p8) -> poly8x16_t { + transmute(u8x16::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x3)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -18286,20 +18382,20 @@ pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1q_dup_s16(ptr: *const i16) -> int16x8_t { + transmute(i16x8::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x4)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -18309,20 +18405,20 @@ pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1q_dup_s32(ptr: *const i32) -> int32x4_t { + transmute(i32x4::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x2)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -18332,20 +18428,20 @@ pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1q_dup_s64(ptr: *const i64) -> int64x2_t { + transmute(i64x2::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x3)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -18355,20 +18451,20 @@ pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t { - crate::ptr::read_unaligned(a.cast()) +pub unsafe fn vld1q_dup_s8(ptr: *const i8) -> int8x16_t { + transmute(i8x16::splat(*ptr)) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x4)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld) + assert_instr(ld1r) )] #[cfg_attr( not(target_arch = "arm"), @@ -18378,175 +18474,17 @@ pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_p16_x4(a: *const p16) -> poly16x8x4_t { - crate::ptr::read_unaligned(a.cast()) -} -#[inline] -#[rustc_legacy_const_generics(1)] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1_v1i64(a: *const i8) -> int64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v1i64")] - fn _vld1_v1i64(a: *const i8, b: i32) -> int64x1_t; - } - _vld1_v1i64(a, ALIGN) -} -#[inline] -#[rustc_legacy_const_generics(1)] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1_v2f32(a: *const i8) -> float32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2f32")] - fn _vld1_v2f32(a: *const i8, b: i32) -> float32x2_t; - } - _vld1_v2f32(a, ALIGN) -} -#[inline] -#[rustc_legacy_const_generics(1)] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1_v2i32(a: *const i8) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2i32")] - fn _vld1_v2i32(a: *const i8, b: i32) -> int32x2_t; - } - _vld1_v2i32(a, ALIGN) -} -#[inline] -#[rustc_legacy_const_generics(1)] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1_v4i16(a: *const i8) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4i16")] - fn _vld1_v4i16(a: *const i8, b: i32) -> int16x4_t; - } - _vld1_v4i16(a, ALIGN) -} -#[inline] -#[rustc_legacy_const_generics(1)] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1_v8i8(a: *const i8) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8i8")] - fn _vld1_v8i8(a: *const i8, b: i32) -> int8x8_t; - } - _vld1_v8i8(a, ALIGN) -} -#[inline] -#[rustc_legacy_const_generics(1)] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1q_v16i8(a: *const i8) -> int8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v16i8")] - fn _vld1q_v16i8(a: *const i8, b: i32) -> int8x16_t; - } - _vld1q_v16i8(a, ALIGN) -} -#[inline] -#[rustc_legacy_const_generics(1)] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1q_v2i64(a: *const i8) -> int64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2i64")] - fn _vld1q_v2i64(a: *const i8, b: i32) -> int64x2_t; - } - _vld1q_v2i64(a, ALIGN) -} -#[inline] -#[rustc_legacy_const_generics(1)] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1q_v4f32(a: *const i8) -> float32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4f32")] - fn _vld1q_v4f32(a: *const i8, b: i32) -> float32x4_t; - } - _vld1q_v4f32(a, ALIGN) -} -#[inline] -#[rustc_legacy_const_generics(1)] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1q_v4i32(a: *const i8) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4i32")] - fn _vld1q_v4i32(a: *const i8, b: i32) -> int32x4_t; - } - _vld1q_v4i32(a, ALIGN) -} -#[inline] -#[rustc_legacy_const_generics(1)] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1q_v8i16(a: *const i8) -> int16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8i16")] - fn _vld1q_v8i16(a: *const i8, b: i32) -> int16x8_t; - } - _vld1q_v8i16(a, ALIGN) -} -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg(not(target_arch = "arm64ec"))] -unsafe fn vld1_v4f16(a: *const i8, b: i32) -> float16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4f16")] - fn _vld1_v4f16(a: *const i8, b: i32) -> float16x4_t; - } - _vld1_v4f16(a, b) -} -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg(not(target_arch = "arm64ec"))] -unsafe fn vld1q_v8f16(a: *const i8, b: i32) -> float16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8f16")] - fn _vld1q_v8f16(a: *const i8, b: i32) -> float16x8_t; - } - _vld1q_v8f16(a, b) +pub unsafe fn vld1q_dup_u16(ptr: *const u16) -> uint16x8_t { + transmute(u16x8::splat(*ptr)) } #[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_p64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1r) @@ -18559,387 +18497,499 @@ unsafe fn vld1q_v8f16(a: *const i8, b: i32) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld1q_dup_p64(ptr: *const p64) -> poly64x2_t { - let x = vld1q_lane_p64::<0>(ptr, transmute(u64x2::splat(0))); - simd_shuffle!(x, x, [0, 0]) +pub unsafe fn vld1q_dup_u32(ptr: *const u32) -> uint32x4_t { + transmute(u32x4::splat(*ptr)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f16)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld2_dup_f16(a: *const f16) -> float16x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4f16.p0")] - fn _vld2_dup_f16(ptr: *const f16, size: i32) -> float16x4x2_t; - } - _vld2_dup_f16(a as _, 2) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_dup_u64(ptr: *const u64) -> uint64x2_t { + transmute(u64x2::splat(*ptr)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f16)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld2q_dup_f16(a: *const f16) -> float16x8x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8f16.p0")] - fn _vld2q_dup_f16(ptr: *const f16, size: i32) -> float16x8x2_t; - } - _vld2q_dup_f16(a as _, 2) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_dup_u8(ptr: *const u8) -> uint8x16_t { + transmute(u8x16::splat(*ptr)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f16)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_p64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ldr) )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld2_dup_f16(a: *const f16) -> float16x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2r.v4f16.p0" - )] - fn _vld2_dup_f16(ptr: *const f16) -> float16x4x2_t; +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_dup_p64(ptr: *const p64) -> poly64x1_t { + let x: poly64x1_t; + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + { + x = crate::core_arch::aarch64::vld1_p64(ptr); } - _vld2_dup_f16(a as _) + #[cfg(target_arch = "arm")] + { + x = crate::core_arch::arm::vld1_p64(ptr); + }; + x } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f16)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ldr) )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld2q_dup_f16(a: *const f16) -> float16x8x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2r.v8f16.p0" - )] - fn _vld2q_dup_f16(ptr: *const f16) -> float16x8x2_t; - } - _vld2q_dup_f16(a as _) -} -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2f32.p0")] - fn _vld2_dup_f32(ptr: *const i8, size: i32) -> float32x2x2_t; +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_dup_s64(ptr: *const i64) -> int64x1_t { + let x: int64x1_t; + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + { + x = crate::core_arch::aarch64::vld1_s64(ptr); } - _vld2_dup_f32(a as *const i8, 4) + #[cfg(target_arch = "arm")] + { + x = crate::core_arch::arm::vld1_s64(ptr); + }; + x } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f32)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4f32.p0")] - fn _vld2q_dup_f32(ptr: *const i8, size: i32) -> float32x4x2_t; +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ldr) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_dup_u64(ptr: *const u64) -> uint64x1_t { + let x: uint64x1_t; + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + { + x = crate::core_arch::aarch64::vld1_u64(ptr); } - _vld2q_dup_f32(a as *const i8, 4) + #[cfg(target_arch = "arm")] + { + x = crate::core_arch::arm::vld1_u64(ptr); + }; + x } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i8.p0")] - fn _vld2_dup_s8(ptr: *const i8, size: i32) -> int8x8x2_t; - } - _vld2_dup_s8(a as *const i8, 1) +#[target_feature(enable = "neon,v7")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1_f16(ptr: *const f16) -> float16x4_t { + transmute(vld1_v4f16( + ptr as *const i8, + crate::mem::align_of::() as i32, + )) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] +#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1q_f16(ptr: *const f16) -> float16x8_t { + transmute(vld1q_v8f16( + ptr as *const i8, + crate::mem::align_of::() as i32, + )) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld1_f16_x2(a: *const f16) -> float16x4x2_t { + crate::ptr::read_unaligned(a.cast()) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld1_f16_x3(a: *const f16) -> float16x4x3_t { + crate::ptr::read_unaligned(a.cast()) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld1_f16_x4(a: *const f16) -> float16x4x4_t { + crate::ptr::read_unaligned(a.cast()) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld1q_f16_x2(a: *const f16) -> float16x8x2_t { + crate::ptr::read_unaligned(a.cast()) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld1q_f16_x3(a: *const f16) -> float16x8x3_t { + crate::ptr::read_unaligned(a.cast()) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld1q_f16_x4(a: *const f16) -> float16x8x4_t { + crate::ptr::read_unaligned(a.cast()) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] #[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v16i8.p0")] - fn _vld2q_dup_s8(ptr: *const i8, size: i32) -> int8x16x2_t; - } - _vld2q_dup_s8(a as *const i8, 1) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1_v2f32::(ptr as *const i8)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i16.p0")] - fn _vld2_dup_s16(ptr: *const i8, size: i32) -> int16x4x2_t; - } - _vld2_dup_s16(a as *const i8, 2) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v4f32::(ptr as *const i8)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i16.p0")] - fn _vld2q_dup_s16(ptr: *const i8, size: i32) -> int16x8x2_t; - } - _vld2q_dup_s16(a as *const i8, 2) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1_v8i8::(ptr as *const i8)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2i32.p0")] - fn _vld2_dup_s32(ptr: *const i8, size: i32) -> int32x2x2_t; - } - _vld2_dup_s32(a as *const i8, 4) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v16i8::(ptr as *const i8)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] +#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1_v4i16::(ptr as *const i8)) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] #[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i32.p0")] - fn _vld2q_dup_s32(ptr: *const i8, size: i32) -> int32x4x2_t; - } - _vld2q_dup_s32(a as *const i8, 4) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v8i16::(ptr as *const i8)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2r))] -pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2r.v2f32.p0" - )] - fn _vld2_dup_f32(ptr: *const f32) -> float32x2x2_t; - } - _vld2_dup_f32(a as _) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1_v2i32::(ptr as *const i8)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2r))] -pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2r.v4f32.p0" - )] - fn _vld2q_dup_f32(ptr: *const f32) -> float32x4x2_t; - } - _vld2q_dup_f32(a as _) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v4i32::(ptr as *const i8)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2r))] -pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2r.v8i8.p0" - )] - fn _vld2_dup_s8(ptr: *const i8) -> int8x8x2_t; - } - _vld2_dup_s8(a as _) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1_v1i64::(ptr as *const i8)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2r))] -pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2r.v16i8.p0" - )] - fn _vld2q_dup_s8(ptr: *const i8) -> int8x16x2_t; - } - _vld2q_dup_s8(a as _) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))] +pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v2i64::(ptr as *const i8)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2r))] -pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2r.v4i16.p0" - )] - fn _vld2_dup_s16(ptr: *const i16) -> int16x4x2_t; - } - _vld2_dup_s16(a as _) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1_v8i8::(ptr as *const i8)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2r))] -pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2r.v8i16.p0" - )] - fn _vld2q_dup_s16(ptr: *const i16) -> int16x8x2_t; - } - _vld2q_dup_s16(a as _) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v16i8::(ptr as *const i8)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2r))] -pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2r.v2i32.p0" - )] - fn _vld2_dup_s32(ptr: *const i32) -> int32x2x2_t; - } - _vld2_dup_s32(a as _) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1_v4i16::(ptr as *const i8)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2r))] -pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2r.v4i32.p0" - )] - fn _vld2q_dup_s32(ptr: *const i32) -> int32x4x2_t; - } - _vld2q_dup_s32(a as _) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v8i16::(ptr as *const i8)) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p64)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] +#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))] +pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v2i64::(ptr as *const i8)) +} +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -18949,55 +18999,43 @@ pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_dup_p64(a: *const p64) -> poly64x1x2_t { - transmute(vld2_dup_s64(transmute(a))) +pub unsafe fn vld1_f32_x2(a: *const f32) -> float32x2x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v1i64.p0")] - fn _vld2_dup_s64(ptr: *const i8, size: i32) -> int64x1x2_t; - } - _vld2_dup_s64(a as *const i8, 8) -} -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s64)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2r))] -pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2r.v1i64.p0" - )] - fn _vld2_dup_s64(ptr: *const i64) -> int64x1x2_t; - } - _vld2_dup_s64(a as _) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_f32_x3(a: *const f32) -> float32x2x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u64)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -19007,21 +19045,20 @@ pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_dup_u64(a: *const u64) -> uint64x1x2_t { - transmute(vld2_dup_s64(transmute(a))) +pub unsafe fn vld1_f32_x4(a: *const f32) -> float32x2x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -19031,21 +19068,20 @@ pub unsafe fn vld2_dup_u64(a: *const u64) -> uint64x1x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t { - transmute(vld2_dup_s8(transmute(a))) +pub unsafe fn vld1q_f32_x2(a: *const f32) -> float32x4x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -19055,24 +19091,20 @@ pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t { - let mut ret_val: uint8x8x2_t = transmute(vld2_dup_s8(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val +pub unsafe fn vld1q_f32_x3(a: *const f32) -> float32x4x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -19082,21 +19114,61 @@ pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t { - transmute(vld2q_dup_s8(transmute(a))) +pub unsafe fn vld1q_f32_x4(a: *const f32) -> float32x4x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u8)"] +#[doc = "Load one single-element structure to one lane of one register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld1, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld1_lane_f16(ptr: *const f16, src: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld1q_lane_f16(ptr: *const f16, src: float16x8_t) -> float16x8_t { + static_assert_uimm_bits!(LANE, 3); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 1) )] #[cfg_attr( not(target_arch = "arm"), @@ -19106,32 +19178,22 @@ pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t { - let mut ret_val: uint8x16x2_t = transmute(vld2q_dup_s8(transmute(a))); - ret_val.0 = simd_shuffle!( - ret_val.0, - ret_val.0, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val.1 = simd_shuffle!( - ret_val.1, - ret_val.1, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val +pub unsafe fn vld1_lane_f32(ptr: *const f32, src: float32x2_t) -> float32x2_t { + static_assert_uimm_bits!(LANE, 1); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u16)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld1, LANE = 3) )] #[cfg_attr( not(target_arch = "arm"), @@ -19141,21 +19203,22 @@ pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t { - transmute(vld2_dup_s16(transmute(a))) +pub unsafe fn vld1_lane_p16(ptr: *const p16, src: poly16x4_t) -> poly16x4_t { + static_assert_uimm_bits!(LANE, 2); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u16)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 7))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld1, LANE = 7) )] #[cfg_attr( not(target_arch = "arm"), @@ -19165,24 +19228,22 @@ pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t { - let mut ret_val: uint16x4x2_t = transmute(vld2_dup_s16(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); - ret_val +pub unsafe fn vld1_lane_p8(ptr: *const p8, src: poly8x8_t) -> poly8x8_t { + static_assert_uimm_bits!(LANE, 3); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u16)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld1, LANE = 3) )] #[cfg_attr( not(target_arch = "arm"), @@ -19192,21 +19253,22 @@ pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t { - transmute(vld2q_dup_s16(transmute(a))) +pub unsafe fn vld1_lane_s16(ptr: *const i16, src: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 2); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u16)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld1, LANE = 1) )] #[cfg_attr( not(target_arch = "arm"), @@ -19216,24 +19278,22 @@ pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t { - let mut ret_val: uint16x8x2_t = transmute(vld2q_dup_s16(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val +pub unsafe fn vld1_lane_s32(ptr: *const i32, src: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u32)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ldr, LANE = 0) )] #[cfg_attr( not(target_arch = "arm"), @@ -19243,21 +19303,22 @@ pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t { - transmute(vld2_dup_s32(transmute(a))) +pub unsafe fn vld1_lane_s64(ptr: *const i64, src: int64x1_t) -> int64x1_t { + static_assert!(LANE == 0); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u32)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 7))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld1, LANE = 7) )] #[cfg_attr( not(target_arch = "arm"), @@ -19267,24 +19328,22 @@ pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t { - let mut ret_val: uint32x2x2_t = transmute(vld2_dup_s32(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); - ret_val +pub unsafe fn vld1_lane_s8(ptr: *const i8, src: int8x8_t) -> int8x8_t { + static_assert_uimm_bits!(LANE, 3); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u32)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld1, LANE = 3) )] #[cfg_attr( not(target_arch = "arm"), @@ -19294,21 +19353,22 @@ pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t { - transmute(vld2q_dup_s32(transmute(a))) +pub unsafe fn vld1_lane_u16(ptr: *const u16, src: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(LANE, 2); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u32)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld1, LANE = 1) )] #[cfg_attr( not(target_arch = "arm"), @@ -19318,24 +19378,22 @@ pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t { - let mut ret_val: uint32x4x2_t = transmute(vld2q_dup_s32(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); - ret_val +pub unsafe fn vld1_lane_u32(ptr: *const u32, src: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(LANE, 1); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p8)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ldr, LANE = 0) )] #[cfg_attr( not(target_arch = "arm"), @@ -19345,21 +19403,22 @@ pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t { - transmute(vld2_dup_s8(transmute(a))) +pub unsafe fn vld1_lane_u64(ptr: *const u64, src: uint64x1_t) -> uint64x1_t { + static_assert!(LANE == 0); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p8)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 7))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld1, LANE = 7) )] #[cfg_attr( not(target_arch = "arm"), @@ -19369,24 +19428,22 @@ pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t { - let mut ret_val: poly8x8x2_t = transmute(vld2_dup_s8(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val +pub unsafe fn vld1_lane_u8(ptr: *const u8, src: uint8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(LANE, 3); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p8)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld1, LANE = 3) )] #[cfg_attr( not(target_arch = "arm"), @@ -19396,21 +19453,22 @@ pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t { - transmute(vld2q_dup_s8(transmute(a))) +pub unsafe fn vld1q_lane_f32(ptr: *const f32, src: float32x4_t) -> float32x4_t { + static_assert_uimm_bits!(LANE, 2); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p8)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 7))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld1, LANE = 7) )] #[cfg_attr( not(target_arch = "arm"), @@ -19420,32 +19478,22 @@ pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t { - let mut ret_val: poly8x16x2_t = transmute(vld2q_dup_s8(transmute(a))); - ret_val.0 = simd_shuffle!( - ret_val.0, - ret_val.0, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val.1 = simd_shuffle!( - ret_val.1, - ret_val.1, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val +pub unsafe fn vld1q_lane_p16(ptr: *const p16, src: poly16x8_t) -> poly16x8_t { + static_assert_uimm_bits!(LANE, 3); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p16)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 15))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld1, LANE = 15) )] #[cfg_attr( not(target_arch = "arm"), @@ -19455,21 +19503,22 @@ pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t { - transmute(vld2_dup_s16(transmute(a))) +pub unsafe fn vld1q_lane_p8(ptr: *const p8, src: poly8x16_t) -> poly8x16_t { + static_assert_uimm_bits!(LANE, 4); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p16)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 7))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld1, LANE = 7) )] #[cfg_attr( not(target_arch = "arm"), @@ -19479,24 +19528,22 @@ pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t { - let mut ret_val: poly16x4x2_t = transmute(vld2_dup_s16(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); - ret_val +pub unsafe fn vld1q_lane_s16(ptr: *const i16, src: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 3); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p16)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld1, LANE = 3) )] #[cfg_attr( not(target_arch = "arm"), @@ -19506,21 +19553,22 @@ pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t { - transmute(vld2q_dup_s16(transmute(a))) +pub unsafe fn vld1q_lane_s32(ptr: *const i32, src: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p16)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2r) + assert_instr(ld1, LANE = 1) )] #[cfg_attr( not(target_arch = "arm"), @@ -19530,812 +19578,526 @@ pub unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t { - let mut ret_val: poly16x8x2_t = transmute(vld2q_dup_s16(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val +pub unsafe fn vld1q_lane_s64(ptr: *const i64, src: int64x2_t) -> int64x2_t { + static_assert_uimm_bits!(LANE, 1); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f16)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld2_f16(a: *const f16) -> float16x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4f16.p0")] - fn _vld2_f16(ptr: *const f16, size: i32) -> float16x4x2_t; - } - _vld2_f16(a as _, 2) +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 15))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 15) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_lane_s8(ptr: *const i8, src: int8x16_t) -> int8x16_t { + static_assert_uimm_bits!(LANE, 4); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f16)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld2q_f16(a: *const f16) -> float16x8x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8f16.p0")] - fn _vld2q_f16(ptr: *const f16, size: i32) -> float16x8x2_t; - } - _vld2q_f16(a as _, 2) +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 7))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 7) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_lane_u16(ptr: *const u16, src: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(LANE, 3); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f16)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2) + assert_instr(ld1, LANE = 3) )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld2_f16(a: *const f16) -> float16x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2.v4f16.p0" - )] - fn _vld2_f16(ptr: *const f16) -> float16x4x2_t; - } - _vld2_f16(a as _) +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_lane_u32(ptr: *const u32, src: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 2); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f16)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2) + assert_instr(ld1, LANE = 1) )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld2q_f16(a: *const f16) -> float16x8x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2.v8f16.p0" - )] - fn _vld2q_f16(ptr: *const f16) -> float16x8x2_t; - } - _vld2q_f16(a as _) +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_lane_u64(ptr: *const u64, src: uint64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(LANE, 1); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f32)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2f32")] - fn _vld2_f32(ptr: *const i8, size: i32) -> float32x2x2_t; - } - _vld2_f32(a as *const i8, 4) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f32)"] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 15))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 15) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_lane_u8(ptr: *const u8, src: uint8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(LANE, 4); + simd_insert!(src, LANE as u32, *ptr) +} +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_p64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4f32")] - fn _vld2q_f32(ptr: *const i8, size: i32) -> float32x4x2_t; - } - _vld2q_f32(a as *const i8, 4) +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ldr, LANE = 0) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_lane_p64(ptr: *const p64, src: poly64x1_t) -> poly64x1_t { + static_assert!(LANE == 0); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s8)"] +#[doc = "Load one single-element structure to one lane of one register."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_p64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i8")] - fn _vld2_s8(ptr: *const i8, size: i32) -> int8x8x2_t; - } - _vld2_s8(a as *const i8, 1) +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 1))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1, LANE = 1) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_lane_p64(ptr: *const p64, src: poly64x2_t) -> poly64x2_t { + static_assert_uimm_bits!(LANE, 1); + simd_insert!(src, LANE as u32, *ptr) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,aes")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +pub unsafe fn vld1_p64(ptr: *const p64) -> poly64x1_t { + let a: *const i8 = ptr as *const i8; + let b: i32 = crate::mem::align_of::() as i32; unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v16i8")] - fn _vld2q_s8(ptr: *const i8, size: i32) -> int8x16x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v1i64")] + fn _vld1_v1i64(a: *const i8, b: i32) -> int64x1_t; } - _vld2q_s8(a as *const i8, 1) + transmute(_vld1_v1i64(a, b)) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i16")] - fn _vld2_s16(ptr: *const i8, size: i32) -> int16x4x2_t; - } - _vld2_s16(a as *const i8, 2) +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_p64_x2(a: *const p64) -> poly64x1x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i16")] - fn _vld2q_s16(ptr: *const i8, size: i32) -> int16x8x2_t; - } - _vld2q_s16(a as *const i8, 2) +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_p64_x3(a: *const p64) -> poly64x1x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2i32")] - fn _vld2_s32(ptr: *const i8, size: i32) -> int32x2x2_t; - } - _vld2_s32(a as *const i8, 4) +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_p64_x4(a: *const p64) -> poly64x1x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld2))] -pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i32")] - fn _vld2q_s32(ptr: *const i8, size: i32) -> int32x4x2_t; - } - _vld2q_s32(a as *const i8, 4) +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_p64_x2(a: *const p64) -> poly64x2x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2))] -pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2.v2f32.p0" - )] - fn _vld2_f32(ptr: *const float32x2_t) -> float32x2x2_t; - } - _vld2_f32(a as _) +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2))] -pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2.v4f32.p0" - )] - fn _vld2q_f32(ptr: *const float32x4_t) -> float32x4x2_t; - } - _vld2q_f32(a as _) +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_p64_x4(a: *const p64) -> poly64x2x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2))] -pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2.v8i8.p0" - )] - fn _vld2_s8(ptr: *const int8x8_t) -> int8x8x2_t; - } - _vld2_s8(a as _) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1_v8i8::(ptr as *const i8) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2))] -pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2.v16i8.p0" - )] - fn _vld2q_s8(ptr: *const int8x16_t) -> int8x16x2_t; - } - _vld2q_s8(a as _) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1q_v16i8::(ptr as *const i8) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2))] -pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2.v4i16.p0" - )] - fn _vld2_s16(ptr: *const int16x4_t) -> int16x4x2_t; - } - _vld2_s16(a as _) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1_v4i16::(ptr as *const i8) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2))] -pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2.v8i16.p0" - )] - fn _vld2q_s16(ptr: *const int16x8_t) -> int16x8x2_t; - } - _vld2q_s16(a as _) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1q_v8i16::(ptr as *const i8) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2))] -pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2.v2i32.p0" - )] - fn _vld2_s32(ptr: *const int32x2_t) -> int32x2x2_t; - } - _vld2_s32(a as _) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1_v2i32::(ptr as *const i8) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld2))] -pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2.v4i32.p0" - )] - fn _vld2q_s32(ptr: *const int32x4_t) -> int32x4x2_t; - } - _vld2q_s32(a as _) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1q_v4i32::(ptr as *const i8) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld2_lane_f16(a: *const f16, b: float16x4x2_t) -> float16x4x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4f16.p0")] - fn _vld2_lane_f16( - ptr: *const f16, - a: float16x4_t, - b: float16x4_t, - n: i32, - size: i32, - ) -> float16x4x2_t; - } - _vld2_lane_f16(a as _, b.0, b.1, LANE, 2) +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1_v1i64::(ptr as *const i8) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld2q_lane_f16(a: *const f16, b: float16x8x2_t) -> float16x8x2_t { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8f16.p0")] - fn _vld2q_lane_f16( - ptr: *const f16, - a: float16x8_t, - b: float16x8_t, - n: i32, - size: i32, - ) -> float16x8x2_t; - } - _vld2q_lane_f16(a as _, b.0, b.1, LANE, 2) +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))] +pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1q_v2i64::(ptr as *const i8) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2, LANE = 0) + assert_instr(ld) )] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld2_lane_f16(a: *const f16, b: float16x4x2_t) -> float16x4x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2lane.v4f16.p0" - )] - fn _vld2_lane_f16(a: float16x4_t, b: float16x4_t, n: i64, ptr: *const f16) - -> float16x4x2_t; - } - _vld2_lane_f16(b.0, b.1, LANE as i64, a as _) +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_s8_x2(a: *const i8) -> int8x8x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2, LANE = 0) + assert_instr(ld) )] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld2q_lane_f16(a: *const f16, b: float16x8x2_t) -> float16x8x2_t { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2lane.v8f16.p0" - )] - fn _vld2q_lane_f16( - a: float16x8_t, - b: float16x8_t, - n: i64, - ptr: *const f16, - ) -> float16x8x2_t; - } - _vld2q_lane_f16(b.0, b.1, LANE as i64, a as _) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld2_lane_f32(a: *const f32, b: float32x2x2_t) -> float32x2x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2lane.v2f32.p0" - )] - fn _vld2_lane_f32(a: float32x2_t, b: float32x2_t, n: i64, ptr: *const i8) -> float32x2x2_t; - } - _vld2_lane_f32(b.0, b.1, LANE as i64, a as _) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld2q_lane_f32(a: *const f32, b: float32x4x2_t) -> float32x4x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2lane.v4f32.p0" - )] - fn _vld2q_lane_f32(a: float32x4_t, b: float32x4_t, n: i64, ptr: *const i8) - -> float32x4x2_t; - } - _vld2q_lane_f32(b.0, b.1, LANE as i64, a as _) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld2_lane_s8(a: *const i8, b: int8x8x2_t) -> int8x8x2_t { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2lane.v8i8.p0" - )] - fn _vld2_lane_s8(a: int8x8_t, b: int8x8_t, n: i64, ptr: *const i8) -> int8x8x2_t; - } - _vld2_lane_s8(b.0, b.1, LANE as i64, a as _) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld2_lane_s16(a: *const i16, b: int16x4x2_t) -> int16x4x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2lane.v4i16.p0" - )] - fn _vld2_lane_s16(a: int16x4_t, b: int16x4_t, n: i64, ptr: *const i8) -> int16x4x2_t; - } - _vld2_lane_s16(b.0, b.1, LANE as i64, a as _) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld2q_lane_s16(a: *const i16, b: int16x8x2_t) -> int16x8x2_t { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2lane.v8i16.p0" - )] - fn _vld2q_lane_s16(a: int16x8_t, b: int16x8_t, n: i64, ptr: *const i8) -> int16x8x2_t; - } - _vld2q_lane_s16(b.0, b.1, LANE as i64, a as _) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld2_lane_s32(a: *const i32, b: int32x2x2_t) -> int32x2x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2lane.v2i32.p0" - )] - fn _vld2_lane_s32(a: int32x2_t, b: int32x2_t, n: i64, ptr: *const i8) -> int32x2x2_t; - } - _vld2_lane_s32(b.0, b.1, LANE as i64, a as _) +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_s8_x3(a: *const i8) -> int8x8x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld2q_lane_s32(a: *const i32, b: int32x4x2_t) -> int32x4x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2lane.v4i32.p0" - )] - fn _vld2q_lane_s32(a: int32x4_t, b: int32x4_t, n: i64, ptr: *const i8) -> int32x4x2_t; - } - _vld2q_lane_s32(b.0, b.1, LANE as i64, a as _) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[cfg_attr(test, assert_instr(vld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld2_lane_f32(a: *const f32, b: float32x2x2_t) -> float32x2x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2f32.p0")] - fn _vld2_lane_f32( - ptr: *const i8, - a: float32x2_t, - b: float32x2_t, - n: i32, - size: i32, - ) -> float32x2x2_t; - } - _vld2_lane_f32(a as _, b.0, b.1, LANE, 4) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[cfg_attr(test, assert_instr(vld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld2q_lane_f32(a: *const f32, b: float32x4x2_t) -> float32x4x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4f32.p0")] - fn _vld2q_lane_f32( - ptr: *const i8, - a: float32x4_t, - b: float32x4_t, - n: i32, - size: i32, - ) -> float32x4x2_t; - } - _vld2q_lane_f32(a as _, b.0, b.1, LANE, 4) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[cfg_attr(test, assert_instr(vld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld2q_lane_s16(a: *const i16, b: int16x8x2_t) -> int16x8x2_t { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i16.p0")] - fn _vld2q_lane_s16( - ptr: *const i8, - a: int16x8_t, - b: int16x8_t, - n: i32, - size: i32, - ) -> int16x8x2_t; - } - _vld2q_lane_s16(a as _, b.0, b.1, LANE, 2) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[cfg_attr(test, assert_instr(vld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld2q_lane_s32(a: *const i32, b: int32x4x2_t) -> int32x4x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i32.p0")] - fn _vld2q_lane_s32( - ptr: *const i8, - a: int32x4_t, - b: int32x4_t, - n: i32, - size: i32, - ) -> int32x4x2_t; - } - _vld2q_lane_s32(a as _, b.0, b.1, LANE, 4) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[cfg_attr(test, assert_instr(vld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld2_lane_s8(a: *const i8, b: int8x8x2_t) -> int8x8x2_t { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i8.p0")] - fn _vld2_lane_s8(ptr: *const i8, a: int8x8_t, b: int8x8_t, n: i32, size: i32) - -> int8x8x2_t; - } - _vld2_lane_s8(a as _, b.0, b.1, LANE, 1) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[cfg_attr(test, assert_instr(vld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld2_lane_s16(a: *const i16, b: int16x4x2_t) -> int16x4x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i16.p0")] - fn _vld2_lane_s16( - ptr: *const i8, - a: int16x4_t, - b: int16x4_t, - n: i32, - size: i32, - ) -> int16x4x2_t; - } - _vld2_lane_s16(a as _, b.0, b.1, LANE, 2) -} -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[cfg_attr(test, assert_instr(vld2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld2_lane_s32(a: *const i32, b: int32x2x2_t) -> int32x2x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2i32.p0")] - fn _vld2_lane_s32( - ptr: *const i8, - a: int32x2_t, - b: int32x2_t, - n: i32, - size: i32, - ) -> int32x2x2_t; - } - _vld2_lane_s32(a as _, b.0, b.1, LANE, 4) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_s8_x4(a: *const i8) -> int8x8x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2, LANE = 0) + assert_instr(ld) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -20344,23 +20106,21 @@ pub unsafe fn vld2_lane_s32(a: *const i32, b: int32x2x2_t) -> i target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_lane_u8(a: *const u8, b: uint8x8x2_t) -> uint8x8x2_t { - static_assert_uimm_bits!(LANE, 3); - transmute(vld2_lane_s8::(transmute(a), transmute(b))) +pub unsafe fn vld1q_s8_x2(a: *const i8) -> int8x16x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2, LANE = 0) + assert_instr(ld) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -20369,23 +20129,21 @@ pub unsafe fn vld2_lane_u8(a: *const u8, b: uint8x8x2_t) -> uin target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_lane_u16(a: *const u16, b: uint16x4x2_t) -> uint16x4x2_t { - static_assert_uimm_bits!(LANE, 2); - transmute(vld2_lane_s16::(transmute(a), transmute(b))) +pub unsafe fn vld1q_s8_x3(a: *const i8) -> int8x16x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2, LANE = 0) + assert_instr(ld) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -20394,23 +20152,21 @@ pub unsafe fn vld2_lane_u16(a: *const u16, b: uint16x4x2_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_lane_u16(a: *const u16, b: uint16x8x2_t) -> uint16x8x2_t { - static_assert_uimm_bits!(LANE, 3); - transmute(vld2q_lane_s16::(transmute(a), transmute(b))) +pub unsafe fn vld1q_s8_x4(a: *const i8) -> int8x16x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2, LANE = 0) + assert_instr(ld) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -20419,23 +20175,21 @@ pub unsafe fn vld2q_lane_u16(a: *const u16, b: uint16x8x2_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_lane_u32(a: *const u32, b: uint32x2x2_t) -> uint32x2x2_t { - static_assert_uimm_bits!(LANE, 1); - transmute(vld2_lane_s32::(transmute(a), transmute(b))) +pub unsafe fn vld1_s16_x2(a: *const i16) -> int16x4x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2, LANE = 0) + assert_instr(ld) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -20444,23 +20198,21 @@ pub unsafe fn vld2_lane_u32(a: *const u32, b: uint32x2x2_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_lane_u32(a: *const u32, b: uint32x4x2_t) -> uint32x4x2_t { - static_assert_uimm_bits!(LANE, 2); - transmute(vld2q_lane_s32::(transmute(a), transmute(b))) +pub unsafe fn vld1_s16_x3(a: *const i16) -> int16x4x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2, LANE = 0) + assert_instr(ld) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -20469,23 +20221,21 @@ pub unsafe fn vld2q_lane_u32(a: *const u32, b: uint32x4x2_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_lane_p8(a: *const p8, b: poly8x8x2_t) -> poly8x8x2_t { - static_assert_uimm_bits!(LANE, 3); - transmute(vld2_lane_s8::(transmute(a), transmute(b))) +pub unsafe fn vld1_s16_x4(a: *const i16) -> int16x4x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2, LANE = 0) + assert_instr(ld) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -20494,23 +20244,21 @@ pub unsafe fn vld2_lane_p8(a: *const p8, b: poly8x8x2_t) -> pol target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_lane_p16(a: *const p16, b: poly16x4x2_t) -> poly16x4x2_t { - static_assert_uimm_bits!(LANE, 2); - transmute(vld2_lane_s16::(transmute(a), transmute(b))) +pub unsafe fn vld1q_s16_x2(a: *const i16) -> int16x8x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_p16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2, LANE = 0) + assert_instr(ld) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -20519,21 +20267,20 @@ pub unsafe fn vld2_lane_p16(a: *const p16, b: poly16x4x2_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_lane_p16(a: *const p16, b: poly16x8x2_t) -> poly16x8x2_t { - static_assert_uimm_bits!(LANE, 3); - transmute(vld2q_lane_s16::(transmute(a), transmute(b))) +pub unsafe fn vld1q_s16_x3(a: *const i16) -> int16x8x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p64)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20543,55 +20290,66 @@ pub unsafe fn vld2q_lane_p16(a: *const p16, b: poly16x8x2_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_p64(a: *const p64) -> poly64x1x2_t { - transmute(vld2_s64(transmute(a))) +pub unsafe fn vld1q_s16_x4(a: *const i16) -> int16x8x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s64)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v1i64")] - fn _vld2_s64(ptr: *const i8, size: i32) -> int64x1x2_t; - } - _vld2_s64(a as *const i8, 8) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_s32_x2(a: *const i32) -> int32x2x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s64)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld2.v1i64.p0" - )] - fn _vld2_s64(ptr: *const int64x1_t) -> int64x1x2_t; - } - _vld2_s64(a as _) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_s32_x3(a: *const i32) -> int32x2x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u64)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20601,20 +20359,20 @@ pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_u64(a: *const u64) -> uint64x1x2_t { - transmute(vld2_s64(transmute(a))) +pub unsafe fn vld1_s32_x4(a: *const i32) -> int32x2x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20624,20 +20382,20 @@ pub unsafe fn vld2_u64(a: *const u64) -> uint64x1x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_u8(a: *const u8) -> uint8x8x2_t { - transmute(vld2_s8(transmute(a))) +pub unsafe fn vld1q_s32_x2(a: *const i32) -> int32x4x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20647,20 +20405,20 @@ pub unsafe fn vld2_u8(a: *const u8) -> uint8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_u8(a: *const u8) -> uint8x16x2_t { - transmute(vld2q_s8(transmute(a))) +pub unsafe fn vld1q_s32_x3(a: *const i32) -> int32x4x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20670,20 +20428,20 @@ pub unsafe fn vld2q_u8(a: *const u8) -> uint8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_u16(a: *const u16) -> uint16x4x2_t { - transmute(vld2_s16(transmute(a))) +pub unsafe fn vld1q_s32_x4(a: *const i32) -> int32x4x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20693,20 +20451,20 @@ pub unsafe fn vld2_u16(a: *const u16) -> uint16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_u16(a: *const u16) -> uint16x8x2_t { - transmute(vld2q_s16(transmute(a))) +pub unsafe fn vld1_s64_x2(a: *const i64) -> int64x1x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20716,20 +20474,20 @@ pub unsafe fn vld2q_u16(a: *const u16) -> uint16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_u32(a: *const u32) -> uint32x2x2_t { - transmute(vld2_s32(transmute(a))) +pub unsafe fn vld1_s64_x3(a: *const i64) -> int64x1x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20739,20 +20497,20 @@ pub unsafe fn vld2_u32(a: *const u32) -> uint32x2x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_u32(a: *const u32) -> uint32x4x2_t { - transmute(vld2q_s32(transmute(a))) +pub unsafe fn vld1_s64_x4(a: *const i64) -> int64x1x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20762,20 +20520,20 @@ pub unsafe fn vld2q_u32(a: *const u32) -> uint32x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_p8(a: *const p8) -> poly8x8x2_t { - transmute(vld2_s8(transmute(a))) +pub unsafe fn vld1q_s64_x2(a: *const i64) -> int64x2x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20785,20 +20543,20 @@ pub unsafe fn vld2_p8(a: *const p8) -> poly8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_p8(a: *const p8) -> poly8x16x2_t { - transmute(vld2q_s8(transmute(a))) +pub unsafe fn vld1q_s64_x3(a: *const i64) -> int64x2x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20808,20 +20566,20 @@ pub unsafe fn vld2q_p8(a: *const p8) -> poly8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2_p16(a: *const p16) -> poly16x4x2_t { - transmute(vld2_s16(transmute(a))) +pub unsafe fn vld1q_s64_x4(a: *const i64) -> int64x2x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 2-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld2) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -20831,405 +20589,296 @@ pub unsafe fn vld2_p16(a: *const p16) -> poly16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld2q_p16(a: *const p16) -> poly16x8x2_t { - transmute(vld2q_s16(transmute(a))) +pub unsafe fn vld1_u8_x2(a: *const u8) -> uint8x8x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld3_dup_f16(a: *const f16) -> float16x4x3_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4f16.p0")] - fn _vld3_dup_f16(ptr: *const f16, size: i32) -> float16x4x3_t; - } - _vld3_dup_f16(a as _, 2) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld3q_dup_f16(a: *const f16) -> float16x8x3_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8f16.p0")] - fn _vld3q_dup_f16(ptr: *const f16, size: i32) -> float16x8x3_t; - } - _vld3q_dup_f16(a as _, 2) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld3_dup_f16(a: *const f16) -> float16x4x3_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3r.v4f16.p0" - )] - fn _vld3_dup_f16(ptr: *const f16) -> float16x4x3_t; - } - _vld3_dup_f16(a as _) +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld3q_dup_f16(a: *const f16) -> float16x8x3_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3r.v8f16.p0" - )] - fn _vld3q_dup_f16(ptr: *const f16) -> float16x8x3_t; - } - _vld3q_dup_f16(a as _) +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld3r))] -pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3r.v2f32.p0" - )] - fn _vld3_dup_f32(ptr: *const f32) -> float32x2x3_t; - } - _vld3_dup_f32(a as _) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld3r))] -pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3r.v4f32.p0" - )] - fn _vld3q_dup_f32(ptr: *const f32) -> float32x4x3_t; - } - _vld3q_dup_f32(a as _) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld3r))] -pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3r.v8i8.p0" - )] - fn _vld3_dup_s8(ptr: *const i8) -> int8x8x3_t; - } - _vld3_dup_s8(a as _) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld3r))] -pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3r.v16i8.p0" - )] - fn _vld3q_dup_s8(ptr: *const i8) -> int8x16x3_t; - } - _vld3q_dup_s8(a as _) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld3r))] -pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3r.v4i16.p0" - )] - fn _vld3_dup_s16(ptr: *const i16) -> int16x4x3_t; - } - _vld3_dup_s16(a as _) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld3r))] -pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3r.v8i16.p0" - )] - fn _vld3q_dup_s16(ptr: *const i16) -> int16x8x3_t; - } - _vld3q_dup_s16(a as _) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld3r))] -pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3r.v2i32.p0" - )] - fn _vld3_dup_s32(ptr: *const i32) -> int32x2x3_t; - } - _vld3_dup_s32(a as _) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld3r))] -pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3r.v4i32.p0" - )] - fn _vld3q_dup_s32(ptr: *const i32) -> int32x4x3_t; - } - _vld3q_dup_s32(a as _) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s64)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld3r))] -pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3r.v1i64.p0" - )] - fn _vld3_dup_s64(ptr: *const i64) -> int64x1x3_t; - } - _vld3_dup_s64(a as _) -} -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2f32.p0")] - fn _vld3_dup_f32(ptr: *const i8, size: i32) -> float32x2x3_t; - } - _vld3_dup_f32(a as *const i8, 4) -} -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4f32.p0")] - fn _vld3q_dup_f32(ptr: *const i8, size: i32) -> float32x4x3_t; - } - _vld3q_dup_f32(a as *const i8, 4) -} -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i8.p0")] - fn _vld3_dup_s8(ptr: *const i8, size: i32) -> int8x8x3_t; - } - _vld3_dup_s8(a as *const i8, 1) -} -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v16i8.p0")] - fn _vld3q_dup_s8(ptr: *const i8, size: i32) -> int8x16x3_t; - } - _vld3q_dup_s8(a as *const i8, 1) -} -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i16.p0")] - fn _vld3_dup_s16(ptr: *const i8, size: i32) -> int16x4x3_t; - } - _vld3_dup_s16(a as *const i8, 2) -} -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i16.p0")] - fn _vld3q_dup_s16(ptr: *const i8, size: i32) -> int16x8x3_t; - } - _vld3q_dup_s16(a as *const i8, 2) -} -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2i32.p0")] - fn _vld3_dup_s32(ptr: *const i8, size: i32) -> int32x2x3_t; - } - _vld3_dup_s32(a as *const i8, 4) -} -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i32.p0")] - fn _vld3q_dup_s32(ptr: *const i8, size: i32) -> int32x4x3_t; - } - _vld3q_dup_s32(a as *const i8, 4) -} -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21239,36 +20888,20 @@ pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_dup_p64(a: *const p64) -> poly64x1x3_t { - transmute(vld3_dup_s64(transmute(a))) -} -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v1i64.p0")] - fn _vld3_dup_s64(ptr: *const i8, size: i32) -> int64x1x3_t; - } - _vld3_dup_s64(a as *const i8, 8) +pub unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u64)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21278,21 +20911,20 @@ pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_dup_u64(a: *const u64) -> uint64x1x3_t { - transmute(vld3_dup_s64(transmute(a))) +pub unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21302,21 +20934,20 @@ pub unsafe fn vld3_dup_u64(a: *const u64) -> uint64x1x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t { - transmute(vld3_dup_s8(transmute(a))) +pub unsafe fn vld1q_u32_x2(a: *const u32) -> uint32x4x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21326,25 +20957,20 @@ pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t { - let mut ret_val: uint8x8x3_t = transmute(vld3_dup_s8(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val +pub unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21354,21 +20980,20 @@ pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t { - transmute(vld3q_dup_s8(transmute(a))) +pub unsafe fn vld1q_u32_x4(a: *const u32) -> uint32x4x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21378,37 +21003,20 @@ pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t { - let mut ret_val: uint8x16x3_t = transmute(vld3q_dup_s8(transmute(a))); - ret_val.0 = simd_shuffle!( - ret_val.0, - ret_val.0, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val.1 = simd_shuffle!( - ret_val.1, - ret_val.1, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val.2 = simd_shuffle!( - ret_val.2, - ret_val.2, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val +pub unsafe fn vld1_u64_x2(a: *const u64) -> uint64x1x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21418,21 +21026,20 @@ pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t { - transmute(vld3_dup_s16(transmute(a))) +pub unsafe fn vld1_u64_x3(a: *const u64) -> uint64x1x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21442,25 +21049,20 @@ pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t { - let mut ret_val: uint16x4x3_t = transmute(vld3_dup_s16(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]); - ret_val +pub unsafe fn vld1_u64_x4(a: *const u64) -> uint64x1x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21470,21 +21072,20 @@ pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t { - transmute(vld3q_dup_s16(transmute(a))) +pub unsafe fn vld1q_u64_x2(a: *const u64) -> uint64x2x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21494,25 +21095,20 @@ pub unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t { - let mut ret_val: uint16x8x3_t = transmute(vld3q_dup_s16(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val +pub unsafe fn vld1q_u64_x3(a: *const u64) -> uint64x2x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21522,21 +21118,20 @@ pub unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t { - transmute(vld3_dup_s32(transmute(a))) +pub unsafe fn vld1q_u64_x4(a: *const u64) -> uint64x2x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21546,25 +21141,20 @@ pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t { - let mut ret_val: uint32x2x3_t = transmute(vld3_dup_s32(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]); - ret_val +pub unsafe fn vld1_p8_x2(a: *const p8) -> poly8x8x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21574,21 +21164,20 @@ pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t { - transmute(vld3q_dup_s32(transmute(a))) +pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u32)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21598,25 +21187,20 @@ pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t { - let mut ret_val: uint32x4x3_t = transmute(vld3q_dup_s32(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]); - ret_val +pub unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21626,21 +21210,20 @@ pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t { - transmute(vld3_dup_s8(transmute(a))) +pub unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21650,25 +21233,20 @@ pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t { - let mut ret_val: poly8x8x3_t = transmute(vld3_dup_s8(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val +pub unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21678,21 +21256,20 @@ pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t { - transmute(vld3q_dup_s8(transmute(a))) +pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p8)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21702,37 +21279,20 @@ pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t { - let mut ret_val: poly8x16x3_t = transmute(vld3q_dup_s8(transmute(a))); - ret_val.0 = simd_shuffle!( - ret_val.0, - ret_val.0, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val.1 = simd_shuffle!( - ret_val.1, - ret_val.1, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val.2 = simd_shuffle!( - ret_val.2, - ret_val.2, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val +pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21742,21 +21302,20 @@ pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t { - transmute(vld3_dup_s16(transmute(a))) +pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21766,25 +21325,20 @@ pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t { - let mut ret_val: poly16x4x3_t = transmute(vld3_dup_s16(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]); - ret_val +pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x2)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21794,21 +21348,20 @@ pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t { - transmute(vld3q_dup_s16(transmute(a))) +pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x3)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3r) + assert_instr(ld) )] #[cfg_attr( not(target_arch = "arm"), @@ -21818,365 +21371,254 @@ pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t { - let mut ret_val: poly16x8x3_t = transmute(vld3q_dup_s16(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val +pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f16)"] +#[doc = "Load multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x4)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld3_f16(a: *const f16) -> float16x4x3_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4f16.p0")] - fn _vld3_f16(ptr: *const f16, size: i32) -> float16x4x3_t; - } - _vld3_f16(a as _, 2) -} -#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld3q_f16(a: *const f16) -> float16x8x3_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8f16.p0")] - fn _vld3q_f16(ptr: *const f16, size: i32) -> float16x8x3_t; - } - _vld3q_f16(a as _, 2) -} -#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3) + assert_instr(ld) )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld3_f16(a: *const f16) -> float16x4x3_t { - crate::core_arch::macros::deinterleaving_load!(f16, 4, 3, a) -} -#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3) + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld3q_f16(a: *const f16) -> float16x8x3_t { - crate::core_arch::macros::deinterleaving_load!(f16, 8, 3, a) -} -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3))] -pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t { - crate::core_arch::macros::deinterleaving_load!(f32, 2, 3, a) -} -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3))] -pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t { - crate::core_arch::macros::deinterleaving_load!(f32, 4, 3, a) -} -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3))] -pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t { - crate::core_arch::macros::deinterleaving_load!(i8, 8, 3, a) -} -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3))] -pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t { - crate::core_arch::macros::deinterleaving_load!(i8, 16, 3, a) +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_p16_x4(a: *const p16) -> poly16x8x4_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3))] -pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t { - crate::core_arch::macros::deinterleaving_load!(i16, 4, 3, a) +#[rustc_legacy_const_generics(1)] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +unsafe fn vld1_v1i64(a: *const i8) -> int64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v1i64")] + fn _vld1_v1i64(a: *const i8, b: i32) -> int64x1_t; + } + _vld1_v1i64(a, ALIGN) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3))] -pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t { - crate::core_arch::macros::deinterleaving_load!(i16, 8, 3, a) +#[rustc_legacy_const_generics(1)] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +unsafe fn vld1_v2f32(a: *const i8) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2f32")] + fn _vld1_v2f32(a: *const i8, b: i32) -> float32x2_t; + } + _vld1_v2f32(a, ALIGN) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3))] -pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t { - crate::core_arch::macros::deinterleaving_load!(i32, 2, 3, a) +#[rustc_legacy_const_generics(1)] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +unsafe fn vld1_v2i32(a: *const i8) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2i32")] + fn _vld1_v2i32(a: *const i8, b: i32) -> int32x2_t; + } + _vld1_v2i32(a, ALIGN) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3))] -pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t { - crate::core_arch::macros::deinterleaving_load!(i32, 4, 3, a) +#[rustc_legacy_const_generics(1)] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +unsafe fn vld1_v4i16(a: *const i8) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4i16")] + fn _vld1_v4i16(a: *const i8, b: i32) -> int16x4_t; + } + _vld1_v4i16(a, ALIGN) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t { +unsafe fn vld1_v8i8(a: *const i8) -> int8x8_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2f32.p0")] - fn _vld3_f32(ptr: *const i8, size: i32) -> float32x2x3_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8i8")] + fn _vld1_v8i8(a: *const i8, b: i32) -> int8x8_t; } - _vld3_f32(a as *const i8, 4) + _vld1_v8i8(a, ALIGN) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t { +unsafe fn vld1q_v16i8(a: *const i8) -> int8x16_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4f32.p0")] - fn _vld3q_f32(ptr: *const i8, size: i32) -> float32x4x3_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v16i8")] + fn _vld1q_v16i8(a: *const i8, b: i32) -> int8x16_t; } - _vld3q_f32(a as *const i8, 4) + _vld1q_v16i8(a, ALIGN) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t { +unsafe fn vld1q_v2i64(a: *const i8) -> int64x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i8.p0")] - fn _vld3_s8(ptr: *const i8, size: i32) -> int8x8x3_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2i64")] + fn _vld1q_v2i64(a: *const i8, b: i32) -> int64x2_t; } - _vld3_s8(a as *const i8, 1) + _vld1q_v2i64(a, ALIGN) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t { +unsafe fn vld1q_v4f32(a: *const i8) -> float32x4_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v16i8.p0")] - fn _vld3q_s8(ptr: *const i8, size: i32) -> int8x16x3_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4f32")] + fn _vld1q_v4f32(a: *const i8, b: i32) -> float32x4_t; } - _vld3q_s8(a as *const i8, 1) + _vld1q_v4f32(a, ALIGN) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t { +unsafe fn vld1q_v4i32(a: *const i8) -> int32x4_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i16.p0")] - fn _vld3_s16(ptr: *const i8, size: i32) -> int16x4x3_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4i32")] + fn _vld1q_v4i32(a: *const i8, b: i32) -> int32x4_t; } - _vld3_s16(a as *const i8, 2) + _vld1q_v4i32(a, ALIGN) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t { +unsafe fn vld1q_v8i16(a: *const i8) -> int16x8_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i16.p0")] - fn _vld3q_s16(ptr: *const i8, size: i32) -> int16x8x3_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8i16")] + fn _vld1q_v8i16(a: *const i8, b: i32) -> int16x8_t; } - _vld3q_s16(a as *const i8, 2) + _vld1q_v8i16(a, ALIGN) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t { +#[cfg(not(target_arch = "arm64ec"))] +unsafe fn vld1_v4f16(a: *const i8, b: i32) -> float16x4_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2i32.p0")] - fn _vld3_s32(ptr: *const i8, size: i32) -> int32x2x3_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4f16")] + fn _vld1_v4f16(a: *const i8, b: i32) -> float16x4_t; } - _vld3_s32(a as *const i8, 4) + _vld1_v4f16(a, b) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon,fp16")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld3))] -pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t { +#[cfg(not(target_arch = "arm64ec"))] +unsafe fn vld1q_v8f16(a: *const i8, b: i32) -> float16x8_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i32.p0")] - fn _vld3q_s32(ptr: *const i8, size: i32) -> int32x4x3_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8f16")] + fn _vld1q_v8f16(a: *const i8, b: i32) -> float16x8_t; } - _vld3q_s32(a as *const i8, 4) + _vld1q_v8f16(a, b) } -#[doc = "Load multiple 3-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f16)"] +#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_p64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld1r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld1q_dup_p64(ptr: *const p64) -> poly64x2_t { + let x = vld1q_lane_p64::<0>(ptr, transmute(u64x2::splat(0))); + simd_shuffle!(x, x, [0, 0]) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld3_lane_f16(a: *const f16, b: float16x4x3_t) -> float16x4x3_t { - static_assert_uimm_bits!(LANE, 2); +pub unsafe fn vld2_dup_f16(a: *const f16) -> float16x4x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4f16.p0")] - fn _vld3_lane_f16( - ptr: *const f16, - a: float16x4_t, - b: float16x4_t, - c: float16x4_t, - n: i32, - size: i32, - ) -> float16x4x3_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4f16.p0")] + fn _vld2_dup_f16(ptr: *const f16, size: i32) -> float16x4x2_t; } - _vld3_lane_f16(a as _, b.0, b.1, b.2, LANE, 2) + _vld2_dup_f16(a as _, 2) } -#[doc = "Load multiple 3-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld3q_lane_f16(a: *const f16, b: float16x8x3_t) -> float16x8x3_t { - static_assert_uimm_bits!(LANE, 3); +pub unsafe fn vld2q_dup_f16(a: *const f16) -> float16x8x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8f16.p0")] - fn _vld3q_lane_f16( - ptr: *const f16, - a: float16x8_t, - b: float16x8_t, - c: float16x8_t, - n: i32, - size: i32, - ) -> float16x8x3_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8f16.p0")] + fn _vld2q_dup_f16(ptr: *const f16, size: i32) -> float16x8x2_t; } - _vld3q_lane_f16(a as _, b.0, b.1, b.2, LANE, 2) + _vld2q_dup_f16(a as _, 2) } -#[doc = "Load multiple 3-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] @@ -22184,31 +21626,23 @@ pub unsafe fn vld3q_lane_f16(a: *const f16, b: float16x8x3_t) - #[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3, LANE = 0) + assert_instr(ld2r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld3_lane_f16(a: *const f16, b: float16x4x3_t) -> float16x4x3_t { - static_assert_uimm_bits!(LANE, 2); +pub unsafe fn vld2_dup_f16(a: *const f16) -> float16x4x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3lane.v4f16.p0" + link_name = "llvm.aarch64.neon.ld2r.v4f16.p0" )] - fn _vld3_lane_f16( - a: float16x4_t, - b: float16x4_t, - c: float16x4_t, - n: i64, - ptr: *const f16, - ) -> float16x4x3_t; + fn _vld2_dup_f16(ptr: *const f16) -> float16x4x2_t; } - _vld3_lane_f16(b.0, b.1, b.2, LANE as i64, a as _) + _vld2_dup_f16(a as _) } -#[doc = "Load multiple 3-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] @@ -22216,381 +21650,371 @@ pub unsafe fn vld3_lane_f16(a: *const f16, b: float16x4x3_t) -> #[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3, LANE = 0) + assert_instr(ld2r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld3q_lane_f16(a: *const f16, b: float16x8x3_t) -> float16x8x3_t { - static_assert_uimm_bits!(LANE, 3); +pub unsafe fn vld2q_dup_f16(a: *const f16) -> float16x8x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3lane.v8f16.p0" + link_name = "llvm.aarch64.neon.ld2r.v8f16.p0" )] - fn _vld3q_lane_f16( - a: float16x8_t, - b: float16x8_t, - c: float16x8_t, - n: i64, - ptr: *const f16, - ) -> float16x8x3_t; + fn _vld2q_dup_f16(ptr: *const f16) -> float16x8x2_t; } - _vld3q_lane_f16(b.0, b.1, b.2, LANE as i64, a as _) + _vld2q_dup_f16(a as _) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f32)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld3_lane_f32(a: *const f32, b: float32x2x3_t) -> float32x2x3_t { - static_assert_uimm_bits!(LANE, 1); +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t { unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3lane.v2f32.p0" - )] - fn _vld3_lane_f32( - a: float32x2_t, - b: float32x2_t, - c: float32x2_t, - n: i64, - ptr: *const i8, - ) -> float32x2x3_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2f32.p0")] + fn _vld2_dup_f32(ptr: *const i8, size: i32) -> float32x2x2_t; } - _vld3_lane_f32(b.0, b.1, b.2, LANE as i64, a as _) + _vld2_dup_f32(a as *const i8, 4) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f32)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld3q_lane_f32(a: *const f32, b: float32x4x3_t) -> float32x4x3_t { - static_assert_uimm_bits!(LANE, 2); +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t { unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3lane.v4f32.p0" - )] - fn _vld3q_lane_f32( - a: float32x4_t, - b: float32x4_t, - c: float32x4_t, - n: i64, - ptr: *const i8, - ) -> float32x4x3_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4f32.p0")] + fn _vld2q_dup_f32(ptr: *const i8, size: i32) -> float32x4x2_t; } - _vld3q_lane_f32(b.0, b.1, b.2, LANE as i64, a as _) + _vld2q_dup_f32(a as *const i8, 4) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f32)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld3_lane_f32(a: *const f32, b: float32x2x3_t) -> float32x2x3_t { - static_assert_uimm_bits!(LANE, 1); +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2f32.p0")] - fn _vld3_lane_f32( - ptr: *const i8, - a: float32x2_t, - b: float32x2_t, - c: float32x2_t, - n: i32, - size: i32, - ) -> float32x2x3_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i8.p0")] + fn _vld2_dup_s8(ptr: *const i8, size: i32) -> int8x8x2_t; } - _vld3_lane_f32(a as _, b.0, b.1, b.2, LANE, 4) + _vld2_dup_s8(a as *const i8, 1) } -#[doc = "Load multiple 3-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s8)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v16i8.p0")] + fn _vld2q_dup_s8(ptr: *const i8, size: i32) -> int8x16x2_t; + } + _vld2q_dup_s8(a as *const i8, 1) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i16.p0")] + fn _vld2_dup_s16(ptr: *const i8, size: i32) -> int16x4x2_t; + } + _vld2_dup_s16(a as *const i8, 2) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i16.p0")] + fn _vld2q_dup_s16(ptr: *const i8, size: i32) -> int16x8x2_t; + } + _vld2q_dup_s16(a as *const i8, 2) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2i32.p0")] + fn _vld2_dup_s32(ptr: *const i8, size: i32) -> int32x2x2_t; + } + _vld2_dup_s32(a as *const i8, 4) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i32.p0")] + fn _vld2q_dup_s32(ptr: *const i8, size: i32) -> int32x4x2_t; + } + _vld2q_dup_s32(a as *const i8, 4) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld3_lane_s8(a: *const i8, b: int8x8x3_t) -> int8x8x3_t { - static_assert_uimm_bits!(LANE, 3); +#[cfg_attr(test, assert_instr(ld2r))] +pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3lane.v8i8.p0" + link_name = "llvm.aarch64.neon.ld2r.v2f32.p0" )] - fn _vld3_lane_s8( - a: int8x8_t, - b: int8x8_t, - c: int8x8_t, - n: i64, - ptr: *const i8, - ) -> int8x8x3_t; + fn _vld2_dup_f32(ptr: *const f32) -> float32x2x2_t; } - _vld3_lane_s8(b.0, b.1, b.2, LANE as i64, a as _) + _vld2_dup_f32(a as _) } -#[doc = "Load multiple 3-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld3_lane_s16(a: *const i16, b: int16x4x3_t) -> int16x4x3_t { - static_assert_uimm_bits!(LANE, 2); +#[cfg_attr(test, assert_instr(ld2r))] +pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3lane.v4i16.p0" + link_name = "llvm.aarch64.neon.ld2r.v4f32.p0" )] - fn _vld3_lane_s16( - a: int16x4_t, - b: int16x4_t, - c: int16x4_t, - n: i64, - ptr: *const i8, - ) -> int16x4x3_t; + fn _vld2q_dup_f32(ptr: *const f32) -> float32x4x2_t; } - _vld3_lane_s16(b.0, b.1, b.2, LANE as i64, a as _) + _vld2q_dup_f32(a as _) } -#[doc = "Load multiple 3-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld3q_lane_s16(a: *const i16, b: int16x8x3_t) -> int16x8x3_t { - static_assert_uimm_bits!(LANE, 4); +#[cfg_attr(test, assert_instr(ld2r))] +pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3lane.v8i16.p0" + link_name = "llvm.aarch64.neon.ld2r.v8i8.p0" )] - fn _vld3q_lane_s16( - a: int16x8_t, - b: int16x8_t, - c: int16x8_t, - n: i64, - ptr: *const i8, - ) -> int16x8x3_t; + fn _vld2_dup_s8(ptr: *const i8) -> int8x8x2_t; } - _vld3q_lane_s16(b.0, b.1, b.2, LANE as i64, a as _) + _vld2_dup_s8(a as _) } -#[doc = "Load multiple 3-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s32)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld3_lane_s32(a: *const i32, b: int32x2x3_t) -> int32x2x3_t { - static_assert_uimm_bits!(LANE, 1); +#[cfg_attr(test, assert_instr(ld2r))] +pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3lane.v2i32.p0" + link_name = "llvm.aarch64.neon.ld2r.v16i8.p0" )] - fn _vld3_lane_s32( - a: int32x2_t, - b: int32x2_t, - c: int32x2_t, - n: i64, - ptr: *const i8, - ) -> int32x2x3_t; + fn _vld2q_dup_s8(ptr: *const i8) -> int8x16x2_t; } - _vld3_lane_s32(b.0, b.1, b.2, LANE as i64, a as _) + _vld2q_dup_s8(a as _) } -#[doc = "Load multiple 3-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s32)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld3q_lane_s32(a: *const i32, b: int32x4x3_t) -> int32x4x3_t { - static_assert_uimm_bits!(LANE, 2); +#[cfg_attr(test, assert_instr(ld2r))] +pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3lane.v4i32.p0" + link_name = "llvm.aarch64.neon.ld2r.v4i16.p0" )] - fn _vld3q_lane_s32( - a: int32x4_t, - b: int32x4_t, - c: int32x4_t, - n: i64, - ptr: *const i8, - ) -> int32x4x3_t; + fn _vld2_dup_s16(ptr: *const i16) -> int16x4x2_t; } - _vld3q_lane_s32(b.0, b.1, b.2, LANE as i64, a as _) + _vld2_dup_s16(a as _) } -#[doc = "Load multiple 3-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s8)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld3_lane_s8(a: *const i8, b: int8x8x3_t) -> int8x8x3_t { - static_assert_uimm_bits!(LANE, 3); +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld2r))] +pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i8.p0")] - fn _vld3_lane_s8( - ptr: *const i8, - a: int8x8_t, - b: int8x8_t, - c: int8x8_t, - n: i32, - size: i32, - ) -> int8x8x3_t; + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2r.v8i16.p0" + )] + fn _vld2q_dup_s16(ptr: *const i16) -> int16x8x2_t; } - _vld3_lane_s8(a as _, b.0, b.1, b.2, LANE, 1) + _vld2q_dup_s16(a as _) } -#[doc = "Load multiple 3-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld3_lane_s16(a: *const i16, b: int16x4x3_t) -> int16x4x3_t { - static_assert_uimm_bits!(LANE, 2); +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld2r))] +pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i16.p0")] - fn _vld3_lane_s16( - ptr: *const i8, - a: int16x4_t, - b: int16x4_t, - c: int16x4_t, - n: i32, - size: i32, - ) -> int16x4x3_t; + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2r.v2i32.p0" + )] + fn _vld2_dup_s32(ptr: *const i32) -> int32x2x2_t; } - _vld3_lane_s16(a as _, b.0, b.1, b.2, LANE, 2) + _vld2_dup_s32(a as _) } -#[doc = "Load multiple 3-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld3q_lane_s16(a: *const i16, b: int16x8x3_t) -> int16x8x3_t { - static_assert_uimm_bits!(LANE, 3); +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld2r))] +pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i16.p0")] - fn _vld3q_lane_s16( - ptr: *const i8, - a: int16x8_t, - b: int16x8_t, - c: int16x8_t, - n: i32, - size: i32, - ) -> int16x8x3_t; + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2r.v4i32.p0" + )] + fn _vld2q_dup_s32(ptr: *const i32) -> int32x4x2_t; } - _vld3q_lane_s16(a as _, b.0, b.1, b.2, LANE, 2) + _vld2q_dup_s32(a as _) } -#[doc = "Load multiple 3-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s32)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2r) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_dup_p64(a: *const p64) -> poly64x1x2_t { + transmute(vld2_dup_s64(transmute(a))) +} +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld3_lane_s32(a: *const i32, b: int32x2x3_t) -> int32x2x3_t { - static_assert_uimm_bits!(LANE, 1); +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2i32.p0")] - fn _vld3_lane_s32( - ptr: *const i8, - a: int32x2_t, - b: int32x2_t, - c: int32x2_t, - n: i32, - size: i32, - ) -> int32x2x3_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v1i64.p0")] + fn _vld2_dup_s64(ptr: *const i8, size: i32) -> int64x1x2_t; } - _vld3_lane_s32(a as _, b.0, b.1, b.2, LANE, 4) + _vld2_dup_s64(a as *const i8, 8) } -#[doc = "Load multiple 3-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s32)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld3q_lane_s32(a: *const i32, b: int32x4x3_t) -> int32x4x3_t { - static_assert_uimm_bits!(LANE, 2); +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld2r))] +pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i32.p0")] - fn _vld3q_lane_s32( - ptr: *const i8, - a: int32x4_t, - b: int32x4_t, - c: int32x4_t, - n: i32, - size: i32, - ) -> int32x4x3_t; + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2r.v1i64.p0" + )] + fn _vld2_dup_s64(ptr: *const i64) -> int64x1x2_t; } - _vld3q_lane_s32(a as _, b.0, b.1, b.2, LANE, 4) + _vld2_dup_s64(a as _) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u8)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3, LANE = 0) + assert_instr(ld2r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -22599,23 +22023,21 @@ pub unsafe fn vld3q_lane_s32(a: *const i32, b: int32x4x3_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_lane_u8(a: *const u8, b: uint8x8x3_t) -> uint8x8x3_t { - static_assert_uimm_bits!(LANE, 3); - transmute(vld3_lane_s8::(transmute(a), transmute(b))) +pub unsafe fn vld2_dup_u64(a: *const u64) -> uint64x1x2_t { + transmute(vld2_dup_s64(transmute(a))) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3, LANE = 0) + assert_instr(ld2r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -22624,23 +22046,21 @@ pub unsafe fn vld3_lane_u8(a: *const u8, b: uint8x8x3_t) -> uin target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_lane_u16(a: *const u16, b: uint16x4x3_t) -> uint16x4x3_t { - static_assert_uimm_bits!(LANE, 2); - transmute(vld3_lane_s16::(transmute(a), transmute(b))) +pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t { + transmute(vld2_dup_s8(transmute(a))) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3, LANE = 0) + assert_instr(ld2r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -22649,23 +22069,21 @@ pub unsafe fn vld3_lane_u16(a: *const u16, b: uint16x4x3_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3q_lane_u16(a: *const u16, b: uint16x8x3_t) -> uint16x8x3_t { - static_assert_uimm_bits!(LANE, 3); - transmute(vld3q_lane_s16::(transmute(a), transmute(b))) +pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t { + transmute(vld2q_dup_s8(transmute(a))) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u32)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3, LANE = 0) + assert_instr(ld2r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -22674,23 +22092,21 @@ pub unsafe fn vld3q_lane_u16(a: *const u16, b: uint16x8x3_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_lane_u32(a: *const u32, b: uint32x2x3_t) -> uint32x2x3_t { - static_assert_uimm_bits!(LANE, 1); - transmute(vld3_lane_s32::(transmute(a), transmute(b))) +pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t { + transmute(vld2_dup_s16(transmute(a))) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u32)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3, LANE = 0) + assert_instr(ld2r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -22699,23 +22115,21 @@ pub unsafe fn vld3_lane_u32(a: *const u32, b: uint32x2x3_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3q_lane_u32(a: *const u32, b: uint32x4x3_t) -> uint32x4x3_t { - static_assert_uimm_bits!(LANE, 2); - transmute(vld3q_lane_s32::(transmute(a), transmute(b))) +pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t { + transmute(vld2q_dup_s16(transmute(a))) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p8)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3, LANE = 0) + assert_instr(ld2r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -22724,23 +22138,21 @@ pub unsafe fn vld3q_lane_u32(a: *const u32, b: uint32x4x3_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_lane_p8(a: *const p8, b: poly8x8x3_t) -> poly8x8x3_t { - static_assert_uimm_bits!(LANE, 3); - transmute(vld3_lane_s8::(transmute(a), transmute(b))) +pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t { + transmute(vld2_dup_s32(transmute(a))) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3, LANE = 0) + assert_instr(ld2r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -22749,23 +22161,21 @@ pub unsafe fn vld3_lane_p8(a: *const p8, b: poly8x8x3_t) -> pol target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_lane_p16(a: *const p16, b: poly16x4x3_t) -> poly16x4x3_t { - static_assert_uimm_bits!(LANE, 2); - transmute(vld3_lane_s16::(transmute(a), transmute(b))) +pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t { + transmute(vld2q_dup_s32(transmute(a))) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_p16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3, LANE = 0) + assert_instr(ld2r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -22774,21 +22184,20 @@ pub unsafe fn vld3_lane_p16(a: *const p16, b: poly16x4x3_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3q_lane_p16(a: *const p16, b: poly16x8x3_t) -> poly16x8x3_t { - static_assert_uimm_bits!(LANE, 3); - transmute(vld3q_lane_s16::(transmute(a), transmute(b))) +pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t { + transmute(vld2_dup_s8(transmute(a))) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p64)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ld2r) )] #[cfg_attr( not(target_arch = "arm"), @@ -22798,232 +22207,20 @@ pub unsafe fn vld3q_lane_p16(a: *const p16, b: poly16x8x3_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_p64(a: *const p64) -> poly64x1x3_t { - transmute(vld3_s64(transmute(a))) +pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t { + transmute(vld2q_dup_s8(transmute(a))) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t { - crate::ptr::read_unaligned(a.cast()) -} -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v1i64.p0")] - fn _vld3_s64(ptr: *const i8, size: i32) -> int64x1x3_t; - } - _vld3_s64(a as *const i8, 8) -} -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld3_u64(a: *const u64) -> uint64x1x3_t { - transmute(vld3_s64(transmute(a))) -} -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld3_u8(a: *const u8) -> uint8x8x3_t { - transmute(vld3_s8(transmute(a))) -} -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld3q_u8(a: *const u8) -> uint8x16x3_t { - transmute(vld3q_s8(transmute(a))) -} -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld3_u16(a: *const u16) -> uint16x4x3_t { - transmute(vld3_s16(transmute(a))) -} -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld3q_u16(a: *const u16) -> uint16x8x3_t { - transmute(vld3q_s16(transmute(a))) -} -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld3_u32(a: *const u32) -> uint32x2x3_t { - transmute(vld3_s32(transmute(a))) -} -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld3q_u32(a: *const u32) -> uint32x4x3_t { - transmute(vld3q_s32(transmute(a))) -} -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld3_p8(a: *const p8) -> poly8x8x3_t { - transmute(vld3_s8(transmute(a))) -} -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p8)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3) + assert_instr(ld2r) )] #[cfg_attr( not(target_arch = "arm"), @@ -23033,20 +22230,20 @@ pub unsafe fn vld3_p8(a: *const p8) -> poly8x8x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3q_p8(a: *const p8) -> poly8x16x3_t { - transmute(vld3q_s8(transmute(a))) +pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t { + transmute(vld2_dup_s16(transmute(a))) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3) + assert_instr(ld2r) )] #[cfg_attr( not(target_arch = "arm"), @@ -23056,765 +22253,809 @@ pub unsafe fn vld3q_p8(a: *const p8) -> poly8x16x3_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld3_p16(a: *const p16) -> poly16x4x3_t { - transmute(vld3_s16(transmute(a))) +pub unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t { + transmute(vld2q_dup_s16(transmute(a))) } -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld3q_p16(a: *const p16) -> poly16x8x3_t { - transmute(vld3q_s16(transmute(a))) -} -#[doc = "Load multiple 3-element structures to three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld3q_lane_f32(a: *const f32, b: float32x4x3_t) -> float32x4x3_t { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4f32.p0")] - fn _vld3q_lane_f32( - ptr: *const i8, - a: float32x4_t, - b: float32x4_t, - c: float32x4_t, - n: i32, - size: i32, - ) -> float32x4x3_t; - } - _vld3q_lane_f32(a as _, b.0, b.1, b.2, LANE, 4) -} -#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld4_dup_f16(a: *const f16) -> float16x4x4_t { +pub unsafe fn vld2_f16(a: *const f16) -> float16x4x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4f16.p0")] - fn _vld4_dup_f16(ptr: *const f16, size: i32) -> float16x4x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4f16.p0")] + fn _vld2_f16(ptr: *const f16, size: i32) -> float16x4x2_t; } - _vld4_dup_f16(a as _, 2) + _vld2_f16(a as _, 2) } -#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld4q_dup_f16(a: *const f16) -> float16x8x4_t { +pub unsafe fn vld2q_f16(a: *const f16) -> float16x8x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8f16.p0")] - fn _vld4q_dup_f16(ptr: *const f16, size: i32) -> float16x8x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8f16.p0")] + fn _vld2q_f16(ptr: *const f16, size: i32) -> float16x8x2_t; } - _vld4q_dup_f16(a as _, 2) + _vld2q_f16(a as _, 2) } -#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] +#[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) + assert_instr(ld2) )] -#[target_feature(enable = "neon,fp16")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld4_dup_f16(a: *const f16) -> float16x4x4_t { +pub unsafe fn vld2_f16(a: *const f16) -> float16x4x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4r.v4f16.p0" + link_name = "llvm.aarch64.neon.ld2.v4f16.p0" )] - fn _vld4_dup_f16(ptr: *const f16) -> float16x4x4_t; + fn _vld2_f16(ptr: *const f16) -> float16x4x2_t; } - _vld4_dup_f16(a as _) + _vld2_f16(a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f16)"] +#[doc = "Load single 2-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] +#[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) + assert_instr(ld2) )] -#[target_feature(enable = "neon,fp16")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld4q_dup_f16(a: *const f16) -> float16x8x4_t { +pub unsafe fn vld2q_f16(a: *const f16) -> float16x8x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4r.v8f16.p0" + link_name = "llvm.aarch64.neon.ld2.v8f16.p0" )] - fn _vld4q_dup_f16(ptr: *const f16) -> float16x8x4_t; + fn _vld2q_f16(ptr: *const f16) -> float16x8x2_t; } - _vld4q_dup_f16(a as _) + _vld2q_f16(a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld4))] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t { +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2f32.p0")] - fn _vld4_dup_f32(ptr: *const i8, size: i32) -> float32x2x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2f32")] + fn _vld2_f32(ptr: *const i8, size: i32) -> float32x2x2_t; } - _vld4_dup_f32(a as *const i8, 4) + _vld2_f32(a as *const i8, 4) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld4))] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t { +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4f32.p0")] - fn _vld4q_dup_f32(ptr: *const i8, size: i32) -> float32x4x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4f32")] + fn _vld2q_f32(ptr: *const i8, size: i32) -> float32x4x2_t; } - _vld4q_dup_f32(a as *const i8, 4) + _vld2q_f32(a as *const i8, 4) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s8)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld4))] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t { +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i8.p0")] - fn _vld4_dup_s8(ptr: *const i8, size: i32) -> int8x8x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i8")] + fn _vld2_s8(ptr: *const i8, size: i32) -> int8x8x2_t; } - _vld4_dup_s8(a as *const i8, 1) + _vld2_s8(a as *const i8, 1) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s8)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld4))] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t { +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v16i8.p0")] - fn _vld4q_dup_s8(ptr: *const i8, size: i32) -> int8x16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v16i8")] + fn _vld2q_s8(ptr: *const i8, size: i32) -> int8x16x2_t; } - _vld4q_dup_s8(a as *const i8, 1) + _vld2q_s8(a as *const i8, 1) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld4))] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t { +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i16.p0")] - fn _vld4_dup_s16(ptr: *const i8, size: i32) -> int16x4x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i16")] + fn _vld2_s16(ptr: *const i8, size: i32) -> int16x4x2_t; } - _vld4_dup_s16(a as *const i8, 2) + _vld2_s16(a as *const i8, 2) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld4))] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t { +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i16.p0")] - fn _vld4q_dup_s16(ptr: *const i8, size: i32) -> int16x8x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i16")] + fn _vld2q_s16(ptr: *const i8, size: i32) -> int16x8x2_t; } - _vld4q_dup_s16(a as *const i8, 2) + _vld2q_s16(a as *const i8, 2) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld4))] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t { +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2i32.p0")] - fn _vld4_dup_s32(ptr: *const i8, size: i32) -> int32x2x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2i32")] + fn _vld2_s32(ptr: *const i8, size: i32) -> int32x2x2_t; } - _vld4_dup_s32(a as *const i8, 4) + _vld2_s32(a as *const i8, 4) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vld4))] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t { +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i32.p0")] - fn _vld4q_dup_s32(ptr: *const i8, size: i32) -> int32x4x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i32")] + fn _vld2q_s32(ptr: *const i8, size: i32) -> int32x4x2_t; } - _vld4q_dup_s32(a as *const i8, 4) + _vld2q_s32(a as *const i8, 4) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4r))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t { +#[cfg_attr(test, assert_instr(ld2))] +pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4r.v2f32.p0.p0" + link_name = "llvm.aarch64.neon.ld2.v2f32.p0" )] - fn _vld4_dup_f32(ptr: *const f32) -> float32x2x4_t; + fn _vld2_f32(ptr: *const float32x2_t) -> float32x2x2_t; } - _vld4_dup_f32(a as _) + _vld2_f32(a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4r))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t { +#[cfg_attr(test, assert_instr(ld2))] +pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4r.v4f32.p0.p0" + link_name = "llvm.aarch64.neon.ld2.v4f32.p0" )] - fn _vld4q_dup_f32(ptr: *const f32) -> float32x4x4_t; + fn _vld2q_f32(ptr: *const float32x4_t) -> float32x4x2_t; } - _vld4q_dup_f32(a as _) + _vld2q_f32(a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s8)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4r))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t { +#[cfg_attr(test, assert_instr(ld2))] +pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4r.v8i8.p0.p0" + link_name = "llvm.aarch64.neon.ld2.v8i8.p0" )] - fn _vld4_dup_s8(ptr: *const i8) -> int8x8x4_t; + fn _vld2_s8(ptr: *const int8x8_t) -> int8x8x2_t; } - _vld4_dup_s8(a as _) + _vld2_s8(a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s8)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4r))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t { +#[cfg_attr(test, assert_instr(ld2))] +pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4r.v16i8.p0.p0" + link_name = "llvm.aarch64.neon.ld2.v16i8.p0" )] - fn _vld4q_dup_s8(ptr: *const i8) -> int8x16x4_t; + fn _vld2q_s8(ptr: *const int8x16_t) -> int8x16x2_t; } - _vld4q_dup_s8(a as _) + _vld2q_s8(a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4r))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t { +#[cfg_attr(test, assert_instr(ld2))] +pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4r.v4i16.p0.p0" + link_name = "llvm.aarch64.neon.ld2.v4i16.p0" )] - fn _vld4_dup_s16(ptr: *const i16) -> int16x4x4_t; + fn _vld2_s16(ptr: *const int16x4_t) -> int16x4x2_t; } - _vld4_dup_s16(a as _) + _vld2_s16(a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4r))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t { +#[cfg_attr(test, assert_instr(ld2))] +pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4r.v8i16.p0.p0" + link_name = "llvm.aarch64.neon.ld2.v8i16.p0" )] - fn _vld4q_dup_s16(ptr: *const i16) -> int16x8x4_t; + fn _vld2q_s16(ptr: *const int16x8_t) -> int16x8x2_t; } - _vld4q_dup_s16(a as _) + _vld2q_s16(a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4r))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t { +#[cfg_attr(test, assert_instr(ld2))] +pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4r.v2i32.p0.p0" + link_name = "llvm.aarch64.neon.ld2.v2i32.p0" )] - fn _vld4_dup_s32(ptr: *const i32) -> int32x2x4_t; + fn _vld2_s32(ptr: *const int32x2_t) -> int32x2x2_t; } - _vld4_dup_s32(a as _) + _vld2_s32(a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4r))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t { +#[cfg_attr(test, assert_instr(ld2))] +pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4r.v4i32.p0.p0" + link_name = "llvm.aarch64.neon.ld2.v4i32.p0" )] - fn _vld4q_dup_s32(ptr: *const i32) -> int32x4x4_t; + fn _vld2q_s32(ptr: *const int32x4_t) -> int32x4x2_t; } - _vld4q_dup_s32(a as _) + _vld2q_s32(a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s64)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4r))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t { +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld2_lane_f16(a: *const f16, b: float16x4x2_t) -> float16x4x2_t { + static_assert_uimm_bits!(LANE, 2); unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4r.v1i64.p0.p0" - )] - fn _vld4_dup_s64(ptr: *const i64) -> int64x1x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4f16.p0")] + fn _vld2_lane_f16( + ptr: *const f16, + a: float16x4_t, + b: float16x4_t, + n: i32, + size: i32, + ) -> float16x4x2_t; } - _vld4_dup_s64(a as _) -} -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4_dup_p64(a: *const p64) -> poly64x1x4_t { - transmute(vld4_dup_s64(transmute(a))) + _vld2_lane_f16(a as _, b.0, b.1, LANE, 2) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s64)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(nop))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t { +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld2q_lane_f16(a: *const f16, b: float16x8x2_t) -> float16x8x2_t { + static_assert_uimm_bits!(LANE, 3); unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v1i64.p0")] - fn _vld4_dup_s64(ptr: *const i8, size: i32) -> int64x1x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8f16.p0")] + fn _vld2q_lane_f16( + ptr: *const f16, + a: float16x8_t, + b: float16x8_t, + n: i32, + size: i32, + ) -> float16x8x2_t; } - _vld4_dup_s64(a as *const i8, 8) + _vld2q_lane_f16(a as _, b.0, b.1, LANE, 2) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u64)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(ld2, LANE = 0) )] -pub unsafe fn vld4_dup_u64(a: *const u64) -> uint64x1x4_t { - transmute(vld4_dup_s64(transmute(a))) +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld2_lane_f16(a: *const f16, b: float16x4x2_t) -> float16x4x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v4f16.p0" + )] + fn _vld2_lane_f16(a: float16x4_t, b: float16x4_t, n: i64, ptr: *const f16) + -> float16x4x2_t; + } + _vld2_lane_f16(b.0, b.1, LANE as i64, a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u8)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(ld2, LANE = 0) )] -pub unsafe fn vld4_dup_u8(a: *const u8) -> uint8x8x4_t { - transmute(vld4_dup_s8(transmute(a))) +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld2q_lane_f16(a: *const f16, b: float16x8x2_t) -> float16x8x2_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v8f16.p0" + )] + fn _vld2q_lane_f16( + a: float16x8_t, + b: float16x8_t, + n: i64, + ptr: *const f16, + ) -> float16x8x2_t; + } + _vld2q_lane_f16(b.0, b.1, LANE as i64, a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u8)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4_dup_u8(a: *const u8) -> uint8x8x4_t { - let mut ret_val: uint8x8x4_t = transmute(vld4_dup_s8(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2_lane_f32(a: *const f32, b: float32x2x2_t) -> float32x2x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v2f32.p0" + )] + fn _vld2_lane_f32(a: float32x2_t, b: float32x2_t, n: i64, ptr: *const i8) -> float32x2x2_t; + } + _vld2_lane_f32(b.0, b.1, LANE as i64, a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u8)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4q_dup_u8(a: *const u8) -> uint8x16x4_t { - transmute(vld4q_dup_s8(transmute(a))) +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2q_lane_f32(a: *const f32, b: float32x4x2_t) -> float32x4x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v4f32.p0" + )] + fn _vld2q_lane_f32(a: float32x4_t, b: float32x4_t, n: i64, ptr: *const i8) + -> float32x4x2_t; + } + _vld2q_lane_f32(b.0, b.1, LANE as i64, a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u8)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4q_dup_u8(a: *const u8) -> uint8x16x4_t { - let mut ret_val: uint8x16x4_t = transmute(vld4q_dup_s8(transmute(a))); - ret_val.0 = simd_shuffle!( - ret_val.0, - ret_val.0, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val.1 = simd_shuffle!( - ret_val.1, - ret_val.1, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val.2 = simd_shuffle!( - ret_val.2, - ret_val.2, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val.3 = simd_shuffle!( - ret_val.3, - ret_val.3, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2_lane_s8(a: *const i8, b: int8x8x2_t) -> int8x8x2_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v8i8.p0" + )] + fn _vld2_lane_s8(a: int8x8_t, b: int8x8_t, n: i64, ptr: *const i8) -> int8x8x2_t; + } + _vld2_lane_s8(b.0, b.1, LANE as i64, a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4_dup_u16(a: *const u16) -> uint16x4x4_t { - transmute(vld4_dup_s16(transmute(a))) +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2_lane_s16(a: *const i16, b: int16x4x2_t) -> int16x4x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v4i16.p0" + )] + fn _vld2_lane_s16(a: int16x4_t, b: int16x4_t, n: i64, ptr: *const i8) -> int16x4x2_t; + } + _vld2_lane_s16(b.0, b.1, LANE as i64, a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4_dup_u16(a: *const u16) -> uint16x4x4_t { - let mut ret_val: uint16x4x4_t = transmute(vld4_dup_s16(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]); - ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]); - ret_val +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2q_lane_s16(a: *const i16, b: int16x8x2_t) -> int16x8x2_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v8i16.p0" + )] + fn _vld2q_lane_s16(a: int16x8_t, b: int16x8_t, n: i64, ptr: *const i8) -> int16x8x2_t; + } + _vld2q_lane_s16(b.0, b.1, LANE as i64, a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4q_dup_u16(a: *const u16) -> uint16x8x4_t { - transmute(vld4q_dup_s16(transmute(a))) +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2_lane_s32(a: *const i32, b: int32x2x2_t) -> int32x2x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v2i32.p0" + )] + fn _vld2_lane_s32(a: int32x2_t, b: int32x2_t, n: i64, ptr: *const i8) -> int32x2x2_t; + } + _vld2_lane_s32(b.0, b.1, LANE as i64, a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4q_dup_u16(a: *const u16) -> uint16x8x4_t { - let mut ret_val: uint16x8x4_t = transmute(vld4q_dup_s16(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2q_lane_s32(a: *const i32, b: int32x4x2_t) -> int32x4x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2lane.v4i32.p0" + )] + fn _vld2q_lane_s32(a: int32x4_t, b: int32x4_t, n: i64, ptr: *const i8) -> int32x4x2_t; + } + _vld2q_lane_s32(b.0, b.1, LANE as i64, a as _) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4_dup_u32(a: *const u32) -> uint32x2x4_t { - transmute(vld4_dup_s32(transmute(a))) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld2_lane_f32(a: *const f32, b: float32x2x2_t) -> float32x2x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2f32.p0")] + fn _vld2_lane_f32( + ptr: *const i8, + a: float32x2_t, + b: float32x2_t, + n: i32, + size: i32, + ) -> float32x2x2_t; + } + _vld2_lane_f32(a as _, b.0, b.1, LANE, 4) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld2q_lane_f32(a: *const f32, b: float32x4x2_t) -> float32x4x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4f32.p0")] + fn _vld2q_lane_f32( + ptr: *const i8, + a: float32x4_t, + b: float32x4_t, + n: i32, + size: i32, + ) -> float32x4x2_t; + } + _vld2q_lane_f32(a as _, b.0, b.1, LANE, 4) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld2q_lane_s16(a: *const i16, b: int16x8x2_t) -> int16x8x2_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i16.p0")] + fn _vld2q_lane_s16( + ptr: *const i8, + a: int16x8_t, + b: int16x8_t, + n: i32, + size: i32, + ) -> int16x8x2_t; + } + _vld2q_lane_s16(a as _, b.0, b.1, LANE, 2) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld2q_lane_s32(a: *const i32, b: int32x4x2_t) -> int32x4x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i32.p0")] + fn _vld2q_lane_s32( + ptr: *const i8, + a: int32x4_t, + b: int32x4_t, + n: i32, + size: i32, + ) -> int32x4x2_t; + } + _vld2q_lane_s32(a as _, b.0, b.1, LANE, 4) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld2_lane_s8(a: *const i8, b: int8x8x2_t) -> int8x8x2_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i8.p0")] + fn _vld2_lane_s8(ptr: *const i8, a: int8x8_t, b: int8x8_t, n: i32, size: i32) + -> int8x8x2_t; + } + _vld2_lane_s8(a as _, b.0, b.1, LANE, 1) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld2_lane_s16(a: *const i16, b: int16x4x2_t) -> int16x4x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i16.p0")] + fn _vld2_lane_s16( + ptr: *const i8, + a: int16x4_t, + b: int16x4_t, + n: i32, + size: i32, + ) -> int16x4x2_t; + } + _vld2_lane_s16(a as _, b.0, b.1, LANE, 2) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld2_lane_s32(a: *const i32, b: int32x2x2_t) -> int32x2x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2i32.p0")] + fn _vld2_lane_s32( + ptr: *const i8, + a: int32x2_t, + b: int32x2_t, + n: i32, + size: i32, + ) -> int32x2x2_t; + } + _vld2_lane_s32(a as _, b.0, b.1, LANE, 4) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) + assert_instr(ld2, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -23823,27 +23064,23 @@ pub unsafe fn vld4_dup_u32(a: *const u32) -> uint32x2x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4_dup_u32(a: *const u32) -> uint32x2x4_t { - let mut ret_val: uint32x2x4_t = transmute(vld4_dup_s32(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]); - ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [1, 0]); - ret_val +pub unsafe fn vld2_lane_u8(a: *const u8, b: uint8x8x2_t) -> uint8x8x2_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld2_lane_s8::(transmute(a), transmute(b))) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) + assert_instr(ld2, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -23852,22 +23089,23 @@ pub unsafe fn vld4_dup_u32(a: *const u32) -> uint32x2x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t { - transmute(vld4q_dup_s32(transmute(a))) +pub unsafe fn vld2_lane_u16(a: *const u16, b: uint16x4x2_t) -> uint16x4x2_t { + static_assert_uimm_bits!(LANE, 2); + transmute(vld2_lane_s16::(transmute(a), transmute(b))) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) + assert_instr(ld2, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -23876,27 +23114,23 @@ pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t { - let mut ret_val: uint32x4x4_t = transmute(vld4q_dup_s32(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]); - ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]); - ret_val +pub unsafe fn vld2q_lane_u16(a: *const u16, b: uint16x8x2_t) -> uint16x8x2_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld2q_lane_s16::(transmute(a), transmute(b))) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p8)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) + assert_instr(ld2, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -23905,22 +23139,23 @@ pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t { - transmute(vld4_dup_s8(transmute(a))) +pub unsafe fn vld2_lane_u32(a: *const u32, b: uint32x2x2_t) -> uint32x2x2_t { + static_assert_uimm_bits!(LANE, 1); + transmute(vld2_lane_s32::(transmute(a), transmute(b))) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p8)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) + assert_instr(ld2, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -23929,27 +23164,23 @@ pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t { - let mut ret_val: poly8x8x4_t = transmute(vld4_dup_s8(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val +pub unsafe fn vld2q_lane_u32(a: *const u32, b: uint32x4x2_t) -> uint32x4x2_t { + static_assert_uimm_bits!(LANE, 2); + transmute(vld2q_lane_s32::(transmute(a), transmute(b))) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p8)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) + assert_instr(ld2, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -23958,22 +23189,23 @@ pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t { - transmute(vld4q_dup_s8(transmute(a))) +pub unsafe fn vld2_lane_p8(a: *const p8, b: poly8x8x2_t) -> poly8x8x2_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld2_lane_s8::(transmute(a), transmute(b))) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p8)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) + assert_instr(ld2, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -23982,43 +23214,23 @@ pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t { - let mut ret_val: poly8x16x4_t = transmute(vld4q_dup_s8(transmute(a))); - ret_val.0 = simd_shuffle!( - ret_val.0, - ret_val.0, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val.1 = simd_shuffle!( - ret_val.1, - ret_val.1, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val.2 = simd_shuffle!( - ret_val.2, - ret_val.2, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val.3 = simd_shuffle!( - ret_val.3, - ret_val.3, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ); - ret_val +pub unsafe fn vld2_lane_p16(a: *const p16, b: poly16x4x2_t) -> poly16x4x2_t { + static_assert_uimm_bits!(LANE, 2); + transmute(vld2_lane_s16::(transmute(a), transmute(b))) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) + assert_instr(ld2, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -24027,21 +23239,21 @@ pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t { - transmute(vld4_dup_s16(transmute(a))) +pub unsafe fn vld2q_lane_p16(a: *const p16, b: poly16x8x2_t) -> poly16x8x2_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld2q_lane_s16::(transmute(a), transmute(b))) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) + assert_instr(nop) )] #[cfg_attr( not(target_arch = "arm"), @@ -24051,26 +23263,55 @@ pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t { - let mut ret_val: poly16x4x4_t = transmute(vld4_dup_s16(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]); - ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]); - ret_val +pub unsafe fn vld2_p64(a: *const p64) -> poly64x1x2_t { + transmute(vld2_s64(transmute(a))) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v1i64")] + fn _vld2_s64(ptr: *const i8, size: i32) -> int64x1x2_t; + } + _vld2_s64(a as *const i8, 8) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld2.v1i64.p0" + )] + fn _vld2_s64(ptr: *const int64x1_t) -> int64x1x2_t; + } + _vld2_s64(a as _) +} +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) + assert_instr(nop) )] #[cfg_attr( not(target_arch = "arm"), @@ -24080,21 +23321,20 @@ pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t { - transmute(vld4q_dup_s16(transmute(a))) +pub unsafe fn vld2_u64(a: *const u64) -> uint64x1x2_t { + transmute(vld2_s64(transmute(a))) } -#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4r) + assert_instr(ld2) )] #[cfg_attr( not(target_arch = "arm"), @@ -24104,817 +23344,613 @@ pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t { - let mut ret_val: poly16x8x4_t = transmute(vld4q_dup_s16(transmute(a))); - ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]); - ret_val +pub unsafe fn vld2_u8(a: *const u8) -> uint8x8x2_t { + transmute(vld2_s8(transmute(a))) } -#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld4_f16(a: *const f16) -> float16x4x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4f16.p0")] - fn _vld4_f16(ptr: *const f16, size: i32) -> float16x4x4_t; - } - _vld4_f16(a as _, 2) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_u8(a: *const u8) -> uint8x16x2_t { + transmute(vld2q_s8(transmute(a))) } -#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld4q_f16(a: *const f16) -> float16x8x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8f16.p0")] - fn _vld4q_f16(ptr: *const f16, size: i32) -> float16x8x4_t; - } - _vld4q_f16(a as _, 2) -} -#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(not(target_arch = "arm"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4) + assert_instr(ld2) )] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld4_f16(a: *const f16) -> float16x4x4_t { - crate::core_arch::macros::deinterleaving_load!(f16, 4, 4, a) +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_u16(a: *const u16) -> uint16x4x2_t { + transmute(vld2_s16(transmute(a))) } -#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(not(target_arch = "arm"))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4) + assert_instr(ld2) )] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld4q_f16(a: *const f16) -> float16x8x4_t { - crate::core_arch::macros::deinterleaving_load!(f16, 8, 4, a) +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_u16(a: *const u16) -> uint16x8x2_t { + transmute(vld2q_s16(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld4))] -pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t { - crate::core_arch::macros::deinterleaving_load!(f32, 2, 4, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_u32(a: *const u32) -> uint32x2x2_t { + transmute(vld2_s32(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld4))] -pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t { - crate::core_arch::macros::deinterleaving_load!(f32, 4, 4, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_u32(a: *const u32) -> uint32x4x2_t { + transmute(vld2q_s32(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s8)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld4))] -pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t { - crate::core_arch::macros::deinterleaving_load!(i8, 8, 4, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_p8(a: *const p8) -> poly8x8x2_t { + transmute(vld2_s8(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s8)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld4))] -pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t { - crate::core_arch::macros::deinterleaving_load!(i8, 16, 4, a) -} -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld4))] -pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t { - crate::core_arch::macros::deinterleaving_load!(i16, 4, 4, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_p8(a: *const p8) -> poly8x16x2_t { + transmute(vld2q_s8(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s16)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld4))] -pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t { - crate::core_arch::macros::deinterleaving_load!(i16, 8, 4, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2_p16(a: *const p16) -> poly16x4x2_t { + transmute(vld2_s16(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s32)"] +#[doc = "Load multiple 2-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld4))] -pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t { - crate::core_arch::macros::deinterleaving_load!(i32, 2, 4, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ld2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vld2q_p16(a: *const p16) -> poly16x8x2_t { + transmute(vld2q_s16(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s32)"] +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(ld4))] -pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t { - crate::core_arch::macros::deinterleaving_load!(i32, 4, 4, a) -} -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld4))] -pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2f32.p0")] - fn _vld4_f32(ptr: *const i8, size: i32) -> float32x2x4_t; - } - _vld4_f32(a as *const i8, 4) -} -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld4))] -pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4f32.p0")] - fn _vld4q_f32(ptr: *const i8, size: i32) -> float32x4x4_t; - } - _vld4q_f32(a as *const i8, 4) -} -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld4))] -pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i8.p0")] - fn _vld4_s8(ptr: *const i8, size: i32) -> int8x8x4_t; - } - _vld4_s8(a as *const i8, 1) -} -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld4))] -pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v16i8.p0")] - fn _vld4q_s8(ptr: *const i8, size: i32) -> int8x16x4_t; - } - _vld4q_s8(a as *const i8, 1) -} -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld4))] -pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i16.p0")] - fn _vld4_s16(ptr: *const i8, size: i32) -> int16x4x4_t; - } - _vld4_s16(a as *const i8, 2) -} -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld4))] -pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i16.p0")] - fn _vld4q_s16(ptr: *const i8, size: i32) -> int16x8x4_t; - } - _vld4q_s16(a as *const i8, 2) -} -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld4))] -pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2i32.p0")] - fn _vld4_s32(ptr: *const i8, size: i32) -> int32x2x4_t; - } - _vld4_s32(a as *const i8, 4) -} -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vld4))] -pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i32.p0")] - fn _vld4q_s32(ptr: *const i8, size: i32) -> int32x4x4_t; - } - _vld4q_s32(a as *const i8, 4) -} -#[doc = "Load multiple 4-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld4_lane_f16(a: *const f16, b: float16x4x4_t) -> float16x4x4_t { - static_assert_uimm_bits!(LANE, 2); +pub unsafe fn vld3_dup_f16(a: *const f16) -> float16x4x3_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4f16.p0")] - fn _vld4_lane_f16( - ptr: *const f16, - a: float16x4_t, - b: float16x4_t, - c: float16x4_t, - d: float16x4_t, - n: i32, - size: i32, - ) -> float16x4x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4f16.p0")] + fn _vld3_dup_f16(ptr: *const f16, size: i32) -> float16x4x3_t; } - _vld4_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2) + _vld3_dup_f16(a as _, 2) } -#[doc = "Load multiple 4-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f16)"] +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg(target_arch = "arm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld4q_lane_f16(a: *const f16, b: float16x8x4_t) -> float16x8x4_t { - static_assert_uimm_bits!(LANE, 3); +pub unsafe fn vld3q_dup_f16(a: *const f16) -> float16x8x3_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8f16.p0")] - fn _vld4q_lane_f16( - ptr: *const f16, - a: float16x8_t, - b: float16x8_t, - c: float16x8_t, - d: float16x8_t, - n: i32, - size: i32, - ) -> float16x8x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8f16.p0")] + fn _vld3q_dup_f16(ptr: *const f16, size: i32) -> float16x8x3_t; } - _vld4q_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2) + _vld3q_dup_f16(a as _, 2) } -#[doc = "Load multiple 4-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f16)"] +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] +#[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4, LANE = 0) + assert_instr(ld3r) )] -#[rustc_legacy_const_generics(2)] -#[target_feature(enable = "neon,fp16")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld4_lane_f16(a: *const f16, b: float16x4x4_t) -> float16x4x4_t { - static_assert_uimm_bits!(LANE, 2); +pub unsafe fn vld3_dup_f16(a: *const f16) -> float16x4x3_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4lane.v4f16.p0" + link_name = "llvm.aarch64.neon.ld3r.v4f16.p0" )] - fn _vld4_lane_f16( - a: float16x4_t, - b: float16x4_t, - c: float16x4_t, - d: float16x4_t, - n: i64, - ptr: *const f16, - ) -> float16x4x4_t; + fn _vld3_dup_f16(ptr: *const f16) -> float16x4x3_t; } - _vld4_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _) + _vld3_dup_f16(a as _) } -#[doc = "Load multiple 4-element structures to two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f16)"] +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] +#[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4, LANE = 0) + assert_instr(ld3r) )] -#[rustc_legacy_const_generics(2)] -#[target_feature(enable = "neon,fp16")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vld4q_lane_f16(a: *const f16, b: float16x8x4_t) -> float16x8x4_t { - static_assert_uimm_bits!(LANE, 3); +pub unsafe fn vld3q_dup_f16(a: *const f16) -> float16x8x3_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4lane.v8f16.p0" + link_name = "llvm.aarch64.neon.ld3r.v8f16.p0" )] - fn _vld4q_lane_f16( - a: float16x8_t, - b: float16x8_t, - c: float16x8_t, - d: float16x8_t, - n: i64, - ptr: *const f16, - ) -> float16x8x4_t; + fn _vld3q_dup_f16(ptr: *const f16) -> float16x8x3_t; } - _vld4q_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _) + _vld3q_dup_f16(a as _) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f32)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4_lane_f32(a: *const f32, b: float32x2x4_t) -> float32x2x4_t { - static_assert_uimm_bits!(LANE, 1); +#[cfg_attr(test, assert_instr(ld3r))] +pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4lane.v2f32.p0" + link_name = "llvm.aarch64.neon.ld3r.v2f32.p0" )] - fn _vld4_lane_f32( - a: float32x2_t, - b: float32x2_t, - c: float32x2_t, - d: float32x2_t, - n: i64, - ptr: *const i8, - ) -> float32x2x4_t; + fn _vld3_dup_f32(ptr: *const f32) -> float32x2x3_t; } - _vld4_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _) + _vld3_dup_f32(a as _) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f32)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4q_lane_f32(a: *const f32, b: float32x4x4_t) -> float32x4x4_t { - static_assert_uimm_bits!(LANE, 2); +#[cfg_attr(test, assert_instr(ld3r))] +pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4lane.v4f32.p0" + link_name = "llvm.aarch64.neon.ld3r.v4f32.p0" )] - fn _vld4q_lane_f32( - a: float32x4_t, - b: float32x4_t, - c: float32x4_t, - d: float32x4_t, - n: i64, - ptr: *const i8, - ) -> float32x4x4_t; + fn _vld3q_dup_f32(ptr: *const f32) -> float32x4x3_t; } - _vld4q_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _) + _vld3q_dup_f32(a as _) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s8)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4_lane_s8(a: *const i8, b: int8x8x4_t) -> int8x8x4_t { - static_assert_uimm_bits!(LANE, 3); +#[cfg_attr(test, assert_instr(ld3r))] +pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4lane.v8i8.p0" + link_name = "llvm.aarch64.neon.ld3r.v8i8.p0" )] - fn _vld4_lane_s8( - a: int8x8_t, - b: int8x8_t, - c: int8x8_t, - d: int8x8_t, - n: i64, - ptr: *const i8, - ) -> int8x8x4_t; + fn _vld3_dup_s8(ptr: *const i8) -> int8x8x3_t; } - _vld4_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _) + _vld3_dup_s8(a as _) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s16)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4_lane_s16(a: *const i16, b: int16x4x4_t) -> int16x4x4_t { - static_assert_uimm_bits!(LANE, 2); +#[cfg_attr(test, assert_instr(ld3r))] +pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4lane.v4i16.p0" + link_name = "llvm.aarch64.neon.ld3r.v16i8.p0" )] - fn _vld4_lane_s16( - a: int16x4_t, - b: int16x4_t, - c: int16x4_t, - d: int16x4_t, - n: i64, - ptr: *const i8, - ) -> int16x4x4_t; + fn _vld3q_dup_s8(ptr: *const i8) -> int8x16x3_t; } - _vld4_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _) + _vld3q_dup_s8(a as _) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s16)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4q_lane_s16(a: *const i16, b: int16x8x4_t) -> int16x8x4_t { - static_assert_uimm_bits!(LANE, 3); +#[cfg_attr(test, assert_instr(ld3r))] +pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4lane.v8i16.p0" + link_name = "llvm.aarch64.neon.ld3r.v4i16.p0" )] - fn _vld4q_lane_s16( - a: int16x8_t, - b: int16x8_t, - c: int16x8_t, - d: int16x8_t, - n: i64, - ptr: *const i8, - ) -> int16x8x4_t; + fn _vld3_dup_s16(ptr: *const i16) -> int16x4x3_t; } - _vld4q_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _) + _vld3_dup_s16(a as _) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s32)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4_lane_s32(a: *const i32, b: int32x2x4_t) -> int32x2x4_t { - static_assert_uimm_bits!(LANE, 1); +#[cfg_attr(test, assert_instr(ld3r))] +pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4lane.v2i32.p0" + link_name = "llvm.aarch64.neon.ld3r.v8i16.p0" )] - fn _vld4_lane_s32( - a: int32x2_t, - b: int32x2_t, - c: int32x2_t, - d: int32x2_t, - n: i64, - ptr: *const i8, - ) -> int32x2x4_t; + fn _vld3q_dup_s16(ptr: *const i16) -> int16x8x3_t; } - _vld4_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _) + _vld3q_dup_s16(a as _) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s32)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(ld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vld4q_lane_s32(a: *const i32, b: int32x4x4_t) -> int32x4x4_t { - static_assert_uimm_bits!(LANE, 2); +#[cfg_attr(test, assert_instr(ld3r))] +pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld4lane.v4i32.p0" + link_name = "llvm.aarch64.neon.ld3r.v2i32.p0" )] - fn _vld4q_lane_s32( - a: int32x4_t, - b: int32x4_t, - c: int32x4_t, - d: int32x4_t, - n: i64, - ptr: *const i8, - ) -> int32x4x4_t; + fn _vld3_dup_s32(ptr: *const i32) -> int32x2x3_t; } - _vld4q_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _) + _vld3_dup_s32(a as _) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f32)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[cfg_attr(test, assert_instr(vld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4_lane_f32(a: *const f32, b: float32x2x4_t) -> float32x2x4_t { - static_assert_uimm_bits!(LANE, 1); +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld3r))] +pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2f32.p0")] - fn _vld4_lane_f32( - ptr: *const i8, - a: float32x2_t, - b: float32x2_t, - c: float32x2_t, - d: float32x2_t, - n: i32, - size: i32, - ) -> float32x2x4_t; - } - _vld4_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4) + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3r.v4i32.p0" + )] + fn _vld3q_dup_s32(ptr: *const i32) -> int32x4x3_t; + } + _vld3q_dup_s32(a as _) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f32)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld3r))] +pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3r.v1i64.p0" + )] + fn _vld3_dup_s64(ptr: *const i64) -> int64x1x3_t; + } + _vld3_dup_s64(a as _) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(test, assert_instr(vld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4q_lane_f32(a: *const f32, b: float32x4x4_t) -> float32x4x4_t { - static_assert_uimm_bits!(LANE, 2); +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4f32.p0")] - fn _vld4q_lane_f32( - ptr: *const i8, - a: float32x4_t, - b: float32x4_t, - c: float32x4_t, - d: float32x4_t, - n: i32, - size: i32, - ) -> float32x4x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2f32.p0")] + fn _vld3_dup_f32(ptr: *const i8, size: i32) -> float32x2x3_t; } - _vld4q_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4) + _vld3_dup_f32(a as *const i8, 4) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s8)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(test, assert_instr(vld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4_lane_s8(a: *const i8, b: int8x8x4_t) -> int8x8x4_t { - static_assert_uimm_bits!(LANE, 3); +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i8.p0")] - fn _vld4_lane_s8( - ptr: *const i8, - a: int8x8_t, - b: int8x8_t, - c: int8x8_t, - d: int8x8_t, - n: i32, - size: i32, - ) -> int8x8x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4f32.p0")] + fn _vld3q_dup_f32(ptr: *const i8, size: i32) -> float32x4x3_t; } - _vld4_lane_s8(a as _, b.0, b.1, b.2, b.3, LANE, 1) + _vld3q_dup_f32(a as *const i8, 4) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s16)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(test, assert_instr(vld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4_lane_s16(a: *const i16, b: int16x4x4_t) -> int16x4x4_t { - static_assert_uimm_bits!(LANE, 2); +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i16.p0")] - fn _vld4_lane_s16( - ptr: *const i8, - a: int16x4_t, - b: int16x4_t, - c: int16x4_t, - d: int16x4_t, - n: i32, - size: i32, - ) -> int16x4x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i8.p0")] + fn _vld3_dup_s8(ptr: *const i8, size: i32) -> int8x8x3_t; } - _vld4_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2) + _vld3_dup_s8(a as *const i8, 1) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s16)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(test, assert_instr(vld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4q_lane_s16(a: *const i16, b: int16x8x4_t) -> int16x8x4_t { - static_assert_uimm_bits!(LANE, 3); +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i16.p0")] - fn _vld4q_lane_s16( - ptr: *const i8, - a: int16x8_t, - b: int16x8_t, - c: int16x8_t, - d: int16x8_t, - n: i32, - size: i32, - ) -> int16x8x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v16i8.p0")] + fn _vld3q_dup_s8(ptr: *const i8, size: i32) -> int8x16x3_t; } - _vld4q_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2) + _vld3q_dup_s8(a as *const i8, 1) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s32)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(test, assert_instr(vld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4_lane_s32(a: *const i32, b: int32x2x4_t) -> int32x2x4_t { - static_assert_uimm_bits!(LANE, 1); +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2i32.p0")] - fn _vld4_lane_s32( - ptr: *const i8, - a: int32x2_t, - b: int32x2_t, - c: int32x2_t, - d: int32x2_t, - n: i32, - size: i32, - ) -> int32x2x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i16.p0")] + fn _vld3_dup_s16(ptr: *const i8, size: i32) -> int16x4x3_t; } - _vld4_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4) + _vld3_dup_s16(a as *const i8, 2) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s32)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(test, assert_instr(vld4, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vld4q_lane_s32(a: *const i32, b: int32x4x4_t) -> int32x4x4_t { - static_assert_uimm_bits!(LANE, 2); +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i32.p0")] - fn _vld4q_lane_s32( - ptr: *const i8, - a: int32x4_t, - b: int32x4_t, - c: int32x4_t, - d: int32x4_t, - n: i32, - size: i32, - ) -> int32x4x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i16.p0")] + fn _vld3q_dup_s16(ptr: *const i8, size: i32) -> int16x8x3_t; } - _vld4q_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4) + _vld3q_dup_s16(a as *const i8, 2) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u8)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2i32.p0")] + fn _vld3_dup_s32(ptr: *const i8, size: i32) -> int32x2x3_t; + } + _vld3_dup_s32(a as *const i8, 4) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i32.p0")] + fn _vld3q_dup_s32(ptr: *const i8, size: i32) -> int32x4x3_t; + } + _vld3q_dup_s32(a as *const i8, 4) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4, LANE = 0) + assert_instr(ld3r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -24923,23 +23959,37 @@ pub unsafe fn vld4q_lane_s32(a: *const i32, b: int32x4x4_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4_lane_u8(a: *const u8, b: uint8x8x4_t) -> uint8x8x4_t { - static_assert_uimm_bits!(LANE, 3); - transmute(vld4_lane_s8::(transmute(a), transmute(b))) +pub unsafe fn vld3_dup_p64(a: *const p64) -> poly64x1x3_t { + transmute(vld3_dup_s64(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u16)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v1i64.p0")] + fn _vld3_dup_s64(ptr: *const i8, size: i32) -> int64x1x3_t; + } + _vld3_dup_s64(a as *const i8, 8) +} +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4, LANE = 0) + assert_instr(ld3r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -24948,23 +23998,21 @@ pub unsafe fn vld4_lane_u8(a: *const u8, b: uint8x8x4_t) -> uin target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4_lane_u16(a: *const u16, b: uint16x4x4_t) -> uint16x4x4_t { - static_assert_uimm_bits!(LANE, 2); - transmute(vld4_lane_s16::(transmute(a), transmute(b))) +pub unsafe fn vld3_dup_u64(a: *const u64) -> uint64x1x3_t { + transmute(vld3_dup_s64(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u16)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4, LANE = 0) + assert_instr(ld3r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -24973,23 +24021,21 @@ pub unsafe fn vld4_lane_u16(a: *const u16, b: uint16x4x4_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4q_lane_u16(a: *const u16, b: uint16x8x4_t) -> uint16x8x4_t { - static_assert_uimm_bits!(LANE, 3); - transmute(vld4q_lane_s16::(transmute(a), transmute(b))) +pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t { + transmute(vld3_dup_s8(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u32)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4, LANE = 0) + assert_instr(ld3r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -24998,23 +24044,21 @@ pub unsafe fn vld4q_lane_u16(a: *const u16, b: uint16x8x4_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4_lane_u32(a: *const u32, b: uint32x2x4_t) -> uint32x2x4_t { - static_assert_uimm_bits!(LANE, 1); - transmute(vld4_lane_s32::(transmute(a), transmute(b))) +pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t { + transmute(vld3q_dup_s8(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u32)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4, LANE = 0) + assert_instr(ld3r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -25023,23 +24067,21 @@ pub unsafe fn vld4_lane_u32(a: *const u32, b: uint32x2x4_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4q_lane_u32(a: *const u32, b: uint32x4x4_t) -> uint32x4x4_t { - static_assert_uimm_bits!(LANE, 2); - transmute(vld4q_lane_s32::(transmute(a), transmute(b))) +pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t { + transmute(vld3_dup_s16(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p8)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4, LANE = 0) + assert_instr(ld3r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -25048,23 +24090,21 @@ pub unsafe fn vld4q_lane_u32(a: *const u32, b: uint32x4x4_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4_lane_p8(a: *const p8, b: poly8x8x4_t) -> poly8x8x4_t { - static_assert_uimm_bits!(LANE, 3); - transmute(vld4_lane_s8::(transmute(a), transmute(b))) +pub unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t { + transmute(vld3q_dup_s16(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p16)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4, LANE = 0) + assert_instr(ld3r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -25073,23 +24113,21 @@ pub unsafe fn vld4_lane_p8(a: *const p8, b: poly8x8x4_t) -> pol target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4_lane_p16(a: *const p16, b: poly16x4x4_t) -> poly16x4x4_t { - static_assert_uimm_bits!(LANE, 2); - transmute(vld4_lane_s16::(transmute(a), transmute(b))) +pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t { + transmute(vld3_dup_s32(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_p16)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4, LANE = 0) + assert_instr(ld3r) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -25098,21 +24136,20 @@ pub unsafe fn vld4_lane_p16(a: *const p16, b: poly16x4x4_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4q_lane_p16(a: *const p16, b: poly16x8x4_t) -> poly16x8x4_t { - static_assert_uimm_bits!(LANE, 3); - transmute(vld4q_lane_s16::(transmute(a), transmute(b))) +pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t { + transmute(vld3q_dup_s32(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p64)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ld3r) )] #[cfg_attr( not(target_arch = "arm"), @@ -25122,48 +24159,20 @@ pub unsafe fn vld4q_lane_p16(a: *const p16, b: poly16x8x4_t) -> target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4_p64(a: *const p64) -> poly64x1x4_t { - transmute(vld4_s64(transmute(a))) -} -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t { - crate::ptr::read_unaligned(a.cast()) -} -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v1i64.p0")] - fn _vld4_s64(ptr: *const i8, size: i32) -> int64x1x4_t; - } - _vld4_s64(a as *const i8, 8) +pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t { + transmute(vld3_dup_s8(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u64)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ld3r) )] #[cfg_attr( not(target_arch = "arm"), @@ -25173,20 +24182,20 @@ pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4_u64(a: *const u64) -> uint64x1x4_t { - transmute(vld4_s64(transmute(a))) +pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t { + transmute(vld3q_dup_s8(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u8)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4) + assert_instr(ld3r) )] #[cfg_attr( not(target_arch = "arm"), @@ -25196,20 +24205,20 @@ pub unsafe fn vld4_u64(a: *const u64) -> uint64x1x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4_u8(a: *const u8) -> uint8x8x4_t { - transmute(vld4_s8(transmute(a))) +pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t { + transmute(vld3_dup_s16(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u8)"] +#[doc = "Load single 3-element structure and replicate to all lanes of three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4) + assert_instr(ld3r) )] #[cfg_attr( not(target_arch = "arm"), @@ -25219,416 +24228,775 @@ pub unsafe fn vld4_u8(a: *const u8) -> uint8x8x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vld4q_u8(a: *const u8) -> uint8x16x4_t { - transmute(vld4q_s8(transmute(a))) +pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t { + transmute(vld3q_dup_s16(transmute(a))) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u16)"] +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4_u16(a: *const u16) -> uint16x4x4_t { - transmute(vld4_s16(transmute(a))) +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld3_f16(a: *const f16) -> float16x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4f16.p0")] + fn _vld3_f16(ptr: *const f16, size: i32) -> float16x4x3_t; + } + _vld3_f16(a as _, 2) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u16)"] +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4q_u16(a: *const u16) -> uint16x8x4_t { - transmute(vld4q_s16(transmute(a))) +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld3q_f16(a: *const f16) -> float16x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8f16.p0")] + fn _vld3q_f16(ptr: *const f16, size: i32) -> float16x8x3_t; + } + _vld3q_f16(a as _, 2) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u32)"] +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(ld3) )] -pub unsafe fn vld4_u32(a: *const u32) -> uint32x2x4_t { - transmute(vld4_s32(transmute(a))) +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld3_f16(a: *const f16) -> float16x4x3_t { + crate::core_arch::macros::deinterleaving_load!(f16, 4, 3, a) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u32)"] +#[doc = "Load single 3-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + assert_instr(ld3) )] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4q_u32(a: *const u32) -> uint32x4x4_t { - transmute(vld4q_s32(transmute(a))) +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld3q_f16(a: *const f16) -> float16x8x3_t { + crate::core_arch::macros::deinterleaving_load!(f16, 8, 3, a) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p8)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4_p8(a: *const p8) -> poly8x8x4_t { - transmute(vld4_s8(transmute(a))) +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3))] +pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t { + crate::core_arch::macros::deinterleaving_load!(f32, 2, 3, a) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p8)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4q_p8(a: *const p8) -> poly8x16x4_t { - transmute(vld4q_s8(transmute(a))) +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3))] +pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t { + crate::core_arch::macros::deinterleaving_load!(f32, 4, 3, a) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p16)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4_p16(a: *const p16) -> poly16x4x4_t { - transmute(vld4_s16(transmute(a))) +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3))] +pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t { + crate::core_arch::macros::deinterleaving_load!(i8, 8, 3, a) } -#[doc = "Load multiple 4-element structures to four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p16)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ld4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vld4q_p16(a: *const p16) -> poly16x8x4_t { - transmute(vld4q_s16(transmute(a))) +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3))] +pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t { + crate::core_arch::macros::deinterleaving_load!(i8, 16, 3, a) } -#[doc = "Store SIMD&FP register (immediate offset)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vldrq_p128)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vldrq_p128(a: *const p128) -> p128 { - *a +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3))] +pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t { + crate::core_arch::macros::deinterleaving_load!(i16, 4, 3, a) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f16)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmax) -)] -#[target_feature(enable = "neon,fp16")] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3))] +pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t { + crate::core_arch::macros::deinterleaving_load!(i16, 8, 3, a) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3))] +pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t { + crate::core_arch::macros::deinterleaving_load!(i32, 2, 3, a) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3))] +pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t { + crate::core_arch::macros::deinterleaving_load!(i32, 4, 3, a) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2f32.p0")] + fn _vld3_f32(ptr: *const i8, size: i32) -> float32x2x3_t; + } + _vld3_f32(a as *const i8, 4) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4f32.p0")] + fn _vld3q_f32(ptr: *const i8, size: i32) -> float32x4x3_t; + } + _vld3q_f32(a as *const i8, 4) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i8.p0")] + fn _vld3_s8(ptr: *const i8, size: i32) -> int8x8x3_t; + } + _vld3_s8(a as *const i8, 1) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v16i8.p0")] + fn _vld3q_s8(ptr: *const i8, size: i32) -> int8x16x3_t; + } + _vld3q_s8(a as *const i8, 1) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i16.p0")] + fn _vld3_s16(ptr: *const i8, size: i32) -> int16x4x3_t; + } + _vld3_s16(a as *const i8, 2) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i16.p0")] + fn _vld3q_s16(ptr: *const i8, size: i32) -> int16x8x3_t; + } + _vld3q_s16(a as *const i8, 2) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2i32.p0")] + fn _vld3_s32(ptr: *const i8, size: i32) -> int32x2x3_t; + } + _vld3_s32(a as *const i8, 4) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i32.p0")] + fn _vld3q_s32(ptr: *const i8, size: i32) -> int32x4x3_t; + } + _vld3q_s32(a as *const i8, 4) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { +pub unsafe fn vld3_lane_f16(a: *const f16, b: float16x4x3_t) -> float16x4x3_t { + static_assert_uimm_bits!(LANE, 2); unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmax.v4f16" - )] - fn _vmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4f16.p0")] + fn _vld3_lane_f16( + ptr: *const f16, + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + n: i32, + size: i32, + ) -> float16x4x3_t; } - unsafe { _vmax_f16(a, b) } + _vld3_lane_f16(a as _, b.0, b.1, b.2, LANE, 2) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_f16)"] +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld3q_lane_f16(a: *const f16, b: float16x8x3_t) -> float16x8x3_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8f16.p0")] + fn _vld3q_lane_f16( + ptr: *const f16, + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + n: i32, + size: i32, + ) -> float16x8x3_t; + } + _vld3q_lane_f16(a as _, b.0, b.1, b.2, LANE, 2) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmax) -)] -#[target_feature(enable = "neon,fp16")] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(ld3, LANE = 0) )] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { +pub unsafe fn vld3_lane_f16(a: *const f16, b: float16x4x3_t) -> float16x4x3_t { + static_assert_uimm_bits!(LANE, 2); unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8f16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmax.v8f16" + link_name = "llvm.aarch64.neon.ld3lane.v4f16.p0" )] - fn _vmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + fn _vld3_lane_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + n: i64, + ptr: *const f16, + ) -> float16x4x3_t; } - unsafe { _vmaxq_f16(a, b) } + _vld3_lane_f16(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f32)"] +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmax) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(ld3, LANE = 0) )] -pub fn vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld3q_lane_f16(a: *const f16, b: float16x8x3_t) -> float16x8x3_t { + static_assert_uimm_bits!(LANE, 3); unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2f32")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmax.v2f32" + link_name = "llvm.aarch64.neon.ld3lane.v8f16.p0" )] - fn _vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + fn _vld3q_lane_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + n: i64, + ptr: *const f16, + ) -> float16x8x3_t; } - unsafe { _vmax_f32(a, b) } + _vld3q_lane_f16(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_f32)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmax) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3_lane_f32(a: *const f32, b: float32x2x3_t) -> float32x2x3_t { + static_assert_uimm_bits!(LANE, 1); unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f32")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmax.v4f32" + link_name = "llvm.aarch64.neon.ld3lane.v2f32.p0" )] - fn _vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + fn _vld3_lane_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, + n: i64, + ptr: *const i8, + ) -> float32x2x3_t; } - unsafe { _vmaxq_f32(a, b) } + _vld3_lane_f32(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s8)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smax) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe { - let mask: int8x8_t = simd_ge(a, b); - simd_select(mask, a, b) +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3q_lane_f32(a: *const f32, b: float32x4x3_t) -> float32x4x3_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3lane.v4f32.p0" + )] + fn _vld3q_lane_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, + n: i64, + ptr: *const i8, + ) -> float32x4x3_t; } + _vld3q_lane_f32(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s8)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld3_lane_f32(a: *const f32, b: float32x2x3_t) -> float32x2x3_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2f32.p0")] + fn _vld3_lane_f32( + ptr: *const i8, + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, + n: i32, + size: i32, + ) -> float32x2x3_t; + } + _vld3_lane_f32(a as _, b.0, b.1, b.2, LANE, 4) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smax) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - unsafe { - let mask: int8x16_t = simd_ge(a, b); - simd_select(mask, a, b) +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3_lane_s8(a: *const i8, b: int8x8x3_t) -> int8x8x3_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3lane.v8i8.p0" + )] + fn _vld3_lane_s8( + a: int8x8_t, + b: int8x8_t, + c: int8x8_t, + n: i64, + ptr: *const i8, + ) -> int8x8x3_t; } + _vld3_lane_s8(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s16)"] +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smax) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe { - let mask: int16x4_t = simd_ge(a, b); - simd_select(mask, a, b) +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3_lane_s16(a: *const i16, b: int16x4x3_t) -> int16x4x3_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3lane.v4i16.p0" + )] + fn _vld3_lane_s16( + a: int16x4_t, + b: int16x4_t, + c: int16x4_t, + n: i64, + ptr: *const i8, + ) -> int16x4x3_t; } + _vld3_lane_s16(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s16)"] +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3q_lane_s16(a: *const i16, b: int16x8x3_t) -> int16x8x3_t { + static_assert_uimm_bits!(LANE, 4); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3lane.v8i16.p0" + )] + fn _vld3q_lane_s16( + a: int16x8_t, + b: int16x8_t, + c: int16x8_t, + n: i64, + ptr: *const i8, + ) -> int16x8x3_t; + } + _vld3q_lane_s16(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3_lane_s32(a: *const i32, b: int32x2x3_t) -> int32x2x3_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3lane.v2i32.p0" + )] + fn _vld3_lane_s32( + a: int32x2_t, + b: int32x2_t, + c: int32x2_t, + n: i64, + ptr: *const i8, + ) -> int32x2x3_t; + } + _vld3_lane_s32(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3q_lane_s32(a: *const i32, b: int32x4x3_t) -> int32x4x3_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld3lane.v4i32.p0" + )] + fn _vld3q_lane_s32( + a: int32x4_t, + b: int32x4_t, + c: int32x4_t, + n: i64, + ptr: *const i8, + ) -> int32x4x3_t; + } + _vld3q_lane_s32(b.0, b.1, b.2, LANE as i64, a as _) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld3_lane_s8(a: *const i8, b: int8x8x3_t) -> int8x8x3_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i8.p0")] + fn _vld3_lane_s8( + ptr: *const i8, + a: int8x8_t, + b: int8x8_t, + c: int8x8_t, + n: i32, + size: i32, + ) -> int8x8x3_t; + } + _vld3_lane_s8(a as _, b.0, b.1, b.2, LANE, 1) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld3_lane_s16(a: *const i16, b: int16x4x3_t) -> int16x4x3_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i16.p0")] + fn _vld3_lane_s16( + ptr: *const i8, + a: int16x4_t, + b: int16x4_t, + c: int16x4_t, + n: i32, + size: i32, + ) -> int16x4x3_t; + } + _vld3_lane_s16(a as _, b.0, b.1, b.2, LANE, 2) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld3q_lane_s16(a: *const i16, b: int16x8x3_t) -> int16x8x3_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i16.p0")] + fn _vld3q_lane_s16( + ptr: *const i8, + a: int16x8_t, + b: int16x8_t, + c: int16x8_t, + n: i32, + size: i32, + ) -> int16x8x3_t; + } + _vld3q_lane_s16(a as _, b.0, b.1, b.2, LANE, 2) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld3_lane_s32(a: *const i32, b: int32x2x3_t) -> int32x2x3_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2i32.p0")] + fn _vld3_lane_s32( + ptr: *const i8, + a: int32x2_t, + b: int32x2_t, + c: int32x2_t, + n: i32, + size: i32, + ) -> int32x2x3_t; + } + _vld3_lane_s32(a as _, b.0, b.1, b.2, LANE, 4) +} +#[doc = "Load multiple 3-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld3q_lane_s32(a: *const i32, b: int32x4x3_t) -> int32x4x3_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i32.p0")] + fn _vld3q_lane_s32( + ptr: *const i8, + a: int32x4_t, + b: int32x4_t, + c: int32x4_t, + n: i32, + size: i32, + ) -> int32x4x3_t; + } + _vld3q_lane_s32(a as _, b.0, b.1, b.2, LANE, 4) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smax) + assert_instr(ld3, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -25637,22 +25005,23 @@ pub fn vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe { - let mask: int16x8_t = simd_ge(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3_lane_u8(a: *const u8, b: uint8x8x3_t) -> uint8x8x3_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld3_lane_s8::(transmute(a), transmute(b))) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s32)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smax) + assert_instr(ld3, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -25661,22 +25030,23 @@ pub fn vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe { - let mask: int32x2_t = simd_ge(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3_lane_u16(a: *const u16, b: uint16x4x3_t) -> uint16x4x3_t { + static_assert_uimm_bits!(LANE, 2); + transmute(vld3_lane_s16::(transmute(a), transmute(b))) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s32)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smax) + assert_instr(ld3, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -25685,22 +25055,23 @@ pub fn vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe { - let mask: int32x4_t = simd_ge(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3q_lane_u16(a: *const u16, b: uint16x8x3_t) -> uint16x8x3_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld3q_lane_s16::(transmute(a), transmute(b))) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u8)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umax) + assert_instr(ld3, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -25709,22 +25080,23 @@ pub fn vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { - let mask: uint8x8_t = simd_ge(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3_lane_u32(a: *const u32, b: uint32x2x3_t) -> uint32x2x3_t { + static_assert_uimm_bits!(LANE, 1); + transmute(vld3_lane_s32::(transmute(a), transmute(b))) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u8)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umax) + assert_instr(ld3, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -25733,22 +25105,23 @@ pub fn vmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - unsafe { - let mask: uint8x16_t = simd_ge(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3q_lane_u32(a: *const u32, b: uint32x4x3_t) -> uint32x4x3_t { + static_assert_uimm_bits!(LANE, 2); + transmute(vld3q_lane_s32::(transmute(a), transmute(b))) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u16)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umax) + assert_instr(ld3, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -25757,22 +25130,23 @@ pub fn vmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe { - let mask: uint16x4_t = simd_ge(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3_lane_p8(a: *const p8, b: poly8x8x3_t) -> poly8x8x3_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld3_lane_s8::(transmute(a), transmute(b))) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u16)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umax) + assert_instr(ld3, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -25781,22 +25155,23 @@ pub fn vmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe { - let mask: uint16x8_t = simd_ge(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3_lane_p16(a: *const p16, b: poly16x4x3_t) -> poly16x4x3_t { + static_assert_uimm_bits!(LANE, 2); + transmute(vld3_lane_s16::(transmute(a), transmute(b))) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u32)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umax) + assert_instr(ld3, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -25805,21 +25180,21 @@ pub fn vmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe { - let mask: uint32x2_t = simd_ge(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3q_lane_p16(a: *const p16, b: poly16x8x3_t) -> poly16x8x3_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld3q_lane_s16::(transmute(a), transmute(b))) } -#[doc = "Maximum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u32)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umax) + assert_instr(nop) )] #[cfg_attr( not(target_arch = "arm"), @@ -25829,110 +25204,48 @@ pub fn vmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe { - let mask: uint32x4_t = simd_ge(a, b); - simd_select(mask, a, b) - } -} -#[doc = "Floating-point Maximum Number (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f16)"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmaxnm) -)] -#[target_feature(enable = "neon,fp16")] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmaxnm.v4f16" - )] - fn _vmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; - } - unsafe { _vmaxnm_f16(a, b) } +pub unsafe fn vld3_p64(a: *const p64) -> poly64x1x3_t { + transmute(vld3_s64(transmute(a))) } -#[doc = "Floating-point Maximum Number (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f16)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmaxnm) -)] -#[target_feature(enable = "neon,fp16")] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v8f16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmaxnm.v8f16" - )] - fn _vmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; - } - unsafe { _vmaxnmq_f16(a, b) } +#[target_feature(enable = "neon")] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t { + crate::ptr::read_unaligned(a.cast()) } -#[doc = "Floating-point Maximum Number (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f32)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmaxnm) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v2f32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmaxnm.v2f32" - )] - fn _vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v1i64.p0")] + fn _vld3_s64(ptr: *const i8, size: i32) -> int64x1x3_t; } - unsafe { _vmaxnm_f32(a, b) } + _vld3_s64(a as *const i8, 8) } -#[doc = "Floating-point Maximum Number (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f32)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmaxnm) + assert_instr(nop) )] #[cfg_attr( not(target_arch = "arm"), @@ -25942,86 +25255,20 @@ pub fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmaxnm.v4f32" - )] - fn _vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - } - unsafe { _vmaxnmq_f32(a, b) } -} -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f16)"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmin) -)] -#[target_feature(enable = "neon,fp16")] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmin.v4f16" - )] - fn _vmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; - } - unsafe { _vmin_f16(a, b) } -} -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_f16)"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmin) -)] -#[target_feature(enable = "neon,fp16")] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8f16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmin.v8f16" - )] - fn _vminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; - } - unsafe { _vminq_f16(a, b) } +pub unsafe fn vld3_u64(a: *const u64) -> uint64x1x3_t { + transmute(vld3_s64(transmute(a))) } -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f32)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmin) + assert_instr(ld3) )] #[cfg_attr( not(target_arch = "arm"), @@ -26031,26 +25278,20 @@ pub fn vminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2f32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmin.v2f32" - )] - fn _vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - } - unsafe { _vmin_f32(a, b) } +pub unsafe fn vld3_u8(a: *const u8) -> uint8x8x3_t { + transmute(vld3_s8(transmute(a))) } -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_f32)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmin) + assert_instr(ld3) )] #[cfg_attr( not(target_arch = "arm"), @@ -26060,26 +25301,20 @@ pub fn vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmin.v4f32" - )] - fn _vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - } - unsafe { _vminq_f32(a, b) } +pub unsafe fn vld3q_u8(a: *const u8) -> uint8x16x3_t { + transmute(vld3q_s8(transmute(a))) } -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s8)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smin) + assert_instr(ld3) )] #[cfg_attr( not(target_arch = "arm"), @@ -26089,21 +25324,20 @@ pub fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe { - let mask: int8x8_t = simd_le(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3_u16(a: *const u16) -> uint16x4x3_t { + transmute(vld3_s16(transmute(a))) } -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s8)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smin) + assert_instr(ld3) )] #[cfg_attr( not(target_arch = "arm"), @@ -26113,21 +25347,20 @@ pub fn vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - unsafe { - let mask: int8x16_t = simd_le(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3q_u16(a: *const u16) -> uint16x8x3_t { + transmute(vld3q_s16(transmute(a))) } -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s16)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smin) + assert_instr(ld3) )] #[cfg_attr( not(target_arch = "arm"), @@ -26137,21 +25370,20 @@ pub fn vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe { - let mask: int16x4_t = simd_le(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3_u32(a: *const u32) -> uint32x2x3_t { + transmute(vld3_s32(transmute(a))) } -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s16)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smin) + assert_instr(ld3) )] #[cfg_attr( not(target_arch = "arm"), @@ -26161,21 +25393,20 @@ pub fn vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe { - let mask: int16x8_t = simd_le(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3q_u32(a: *const u32) -> uint32x4x3_t { + transmute(vld3q_s32(transmute(a))) } -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s32)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smin) + assert_instr(ld3) )] #[cfg_attr( not(target_arch = "arm"), @@ -26185,21 +25416,20 @@ pub fn vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe { - let mask: int32x2_t = simd_le(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3_p8(a: *const p8) -> poly8x8x3_t { + transmute(vld3_s8(transmute(a))) } -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s32)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smin) + assert_instr(ld3) )] #[cfg_attr( not(target_arch = "arm"), @@ -26209,21 +25439,20 @@ pub fn vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe { - let mask: int32x4_t = simd_le(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3q_p8(a: *const p8) -> poly8x16x3_t { + transmute(vld3q_s8(transmute(a))) } -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u8)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umin) + assert_instr(ld3) )] #[cfg_attr( not(target_arch = "arm"), @@ -26233,21 +25462,20 @@ pub fn vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { - let mask: uint8x8_t = simd_le(a, b); - simd_select(mask, a, b) - } +pub unsafe fn vld3_p16(a: *const p16) -> poly16x4x3_t { + transmute(vld3_s16(transmute(a))) } -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u8)"] +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umin) + assert_instr(ld3) )] #[cfg_attr( not(target_arch = "arm"), @@ -26257,177 +25485,426 @@ pub fn vmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - unsafe { - let mask: uint8x16_t = simd_le(a, b); - simd_select(mask, a, b) +pub unsafe fn vld3q_p16(a: *const p16) -> poly16x8x3_t { + transmute(vld3q_s16(transmute(a))) +} +#[doc = "Load multiple 3-element structures to three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld3q_lane_f32(a: *const f32, b: float32x4x3_t) -> float32x4x3_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4f32.p0")] + fn _vld3q_lane_f32( + ptr: *const i8, + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, + n: i32, + size: i32, + ) -> float32x4x3_t; } + _vld3q_lane_f32(a as _, b.0, b.1, b.2, LANE, 4) } -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u16)"] +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umin) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe { - let mask: uint16x4_t = simd_le(a, b); - simd_select(mask, a, b) +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld4_dup_f16(a: *const f16) -> float16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4f16.p0")] + fn _vld4_dup_f16(ptr: *const f16, size: i32) -> float16x4x4_t; } + _vld4_dup_f16(a as _, 2) } -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u16)"] +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umin) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe { - let mask: uint16x8_t = simd_le(a, b); - simd_select(mask, a, b) +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld4q_dup_f16(a: *const f16) -> float16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8f16.p0")] + fn _vld4q_dup_f16(ptr: *const f16, size: i32) -> float16x8x4_t; } + _vld4q_dup_f16(a as _, 2) } -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u32)"] +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umin) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(ld4r) )] -pub fn vmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe { - let mask: uint32x2_t = simd_le(a, b); - simd_select(mask, a, b) +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld4_dup_f16(a: *const f16) -> float16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v4f16.p0" + )] + fn _vld4_dup_f16(ptr: *const f16) -> float16x4x4_t; } + _vld4_dup_f16(a as _) } -#[doc = "Minimum (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u32)"] +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umin) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + assert_instr(ld4r) )] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe { - let mask: uint32x4_t = simd_le(a, b); - simd_select(mask, a, b) +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld4q_dup_f16(a: *const f16) -> float16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v8f16.p0" + )] + fn _vld4q_dup_f16(ptr: *const f16) -> float16x8x4_t; } + _vld4q_dup_f16(a as _) } -#[doc = "Floating-point Minimum Number (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f16)"] +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fminnm) -)] -#[target_feature(enable = "neon,fp16")] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2f32.p0")] + fn _vld4_dup_f32(ptr: *const i8, size: i32) -> float32x2x4_t; + } + _vld4_dup_f32(a as *const i8, 4) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4f32.p0")] + fn _vld4q_dup_f32(ptr: *const i8, size: i32) -> float32x4x4_t; + } + _vld4q_dup_f32(a as *const i8, 4) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i8.p0")] + fn _vld4_dup_s8(ptr: *const i8, size: i32) -> int8x8x4_t; + } + _vld4_dup_s8(a as *const i8, 1) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v16i8.p0")] + fn _vld4q_dup_s8(ptr: *const i8, size: i32) -> int8x16x4_t; + } + _vld4q_dup_s8(a as *const i8, 1) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i16.p0")] + fn _vld4_dup_s16(ptr: *const i8, size: i32) -> int16x4x4_t; + } + _vld4_dup_s16(a as *const i8, 2) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i16.p0")] + fn _vld4q_dup_s16(ptr: *const i8, size: i32) -> int16x8x4_t; + } + _vld4q_dup_s16(a as *const i8, 2) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2i32.p0")] + fn _vld4_dup_s32(ptr: *const i8, size: i32) -> int32x2x4_t; + } + _vld4_dup_s32(a as *const i8, 4) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i32.p0")] + fn _vld4q_dup_s32(ptr: *const i8, size: i32) -> int32x4x4_t; + } + _vld4q_dup_s32(a as *const i8, 4) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fminnm.v4f16" + link_name = "llvm.aarch64.neon.ld4r.v2f32.p0.p0" )] - fn _vminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + fn _vld4_dup_f32(ptr: *const f32) -> float32x2x4_t; } - unsafe { _vminnm_f16(a, b) } + _vld4_dup_f32(a as _) } -#[doc = "Floating-point Minimum Number (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f16)"] +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fminnm) -)] -#[target_feature(enable = "neon,fp16")] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v8f16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fminnm.v8f16" + link_name = "llvm.aarch64.neon.ld4r.v4f32.p0.p0" )] - fn _vminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + fn _vld4q_dup_f32(ptr: *const f32) -> float32x4x4_t; } - unsafe { _vminnmq_f16(a, b) } + _vld4q_dup_f32(a as _) } -#[doc = "Floating-point Minimum Number (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f32)"] +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v8i8.p0.p0" + )] + fn _vld4_dup_s8(ptr: *const i8) -> int8x8x4_t; + } + _vld4_dup_s8(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v16i8.p0.p0" + )] + fn _vld4q_dup_s8(ptr: *const i8) -> int8x16x4_t; + } + _vld4q_dup_s8(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v4i16.p0.p0" + )] + fn _vld4_dup_s16(ptr: *const i16) -> int16x4x4_t; + } + _vld4_dup_s16(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v8i16.p0.p0" + )] + fn _vld4q_dup_s16(ptr: *const i16) -> int16x8x4_t; + } + _vld4q_dup_s16(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v2i32.p0.p0" + )] + fn _vld4_dup_s32(ptr: *const i32) -> int32x2x4_t; + } + _vld4_dup_s32(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v4i32.p0.p0" + )] + fn _vld4q_dup_s32(ptr: *const i32) -> int32x4x4_t; + } + _vld4q_dup_s32(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4r.v1i64.p0.p0" + )] + fn _vld4_dup_s64(ptr: *const i64) -> int64x1x4_t; + } + _vld4_dup_s64(a as _) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fminnm) + assert_instr(ld4r) )] #[cfg_attr( not(target_arch = "arm"), @@ -26437,26 +25914,36 @@ pub fn vminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { +pub unsafe fn vld4_dup_p64(a: *const p64) -> poly64x1x4_t { + transmute(vld4_dup_s64(transmute(a))) +} +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(nop))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v2f32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fminnm.v2f32" - )] - fn _vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v1i64.p0")] + fn _vld4_dup_s64(ptr: *const i8, size: i32) -> int64x1x4_t; } - unsafe { _vminnm_f32(a, b) } + _vld4_dup_s64(a as *const i8, 8) } -#[doc = "Floating-point Minimum Number (vector)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f32)"] +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fminnm) + assert_instr(ld4r) )] #[cfg_attr( not(target_arch = "arm"), @@ -26466,26 +25953,20 @@ pub fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fminnm.v4f32" - )] - fn _vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - } - unsafe { _vminnmq_f32(a, b) } +pub unsafe fn vld4_dup_u64(a: *const u64) -> uint64x1x4_t { + transmute(vld4_dup_s64(transmute(a))) } -#[doc = "Floating-point multiply-add to accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_f32)"] +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul) + assert_instr(ld4r) )] #[cfg_attr( not(target_arch = "arm"), @@ -26495,18 +25976,20 @@ pub fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { - unsafe { simd_add(a, simd_mul(b, c)) } +pub unsafe fn vld4_dup_u8(a: *const u8) -> uint8x8x4_t { + transmute(vld4_dup_s8(transmute(a))) } -#[doc = "Floating-point multiply-add to accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_f32)"] +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul) + assert_instr(ld4r) )] #[cfg_attr( not(target_arch = "arm"), @@ -26516,20 +25999,21 @@ pub fn vmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { - unsafe { simd_add(a, simd_mul(b, c)) } +pub unsafe fn vld4q_dup_u8(a: *const u8) -> uint8x16x4_t { + transmute(vld4q_dup_s8(transmute(a))) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_f32)"] +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul, LANE = 1) + assert_instr(ld4r) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -26538,25 +26022,21 @@ pub fn vmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmla_lane_f32( - a: float32x2_t, - b: float32x2_t, - c: float32x2_t, -) -> float32x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { vmla_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +pub unsafe fn vld4_dup_u16(a: *const u16) -> uint16x4x4_t { + transmute(vld4_dup_s16(transmute(a))) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_f32)"] +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul, LANE = 1) + assert_instr(ld4r) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -26565,25 +26045,21 @@ pub fn vmla_lane_f32( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmla_laneq_f32( - a: float32x2_t, - b: float32x2_t, - c: float32x4_t, -) -> float32x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { vmla_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +pub unsafe fn vld4q_dup_u16(a: *const u16) -> uint16x8x4_t { + transmute(vld4q_dup_s16(transmute(a))) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_f32)"] +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul, LANE = 1) + assert_instr(ld4r) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -26592,31 +26068,21 @@ pub fn vmla_laneq_f32( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlaq_lane_f32( - a: float32x4_t, - b: float32x4_t, - c: float32x2_t, -) -> float32x4_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { - vmlaq_f32( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } +pub unsafe fn vld4_dup_u32(a: *const u32) -> uint32x2x4_t { + transmute(vld4_dup_s32(transmute(a))) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_f32)"] +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul, LANE = 1) + assert_instr(ld4r) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -26625,31 +26091,21 @@ pub fn vmlaq_lane_f32( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlaq_laneq_f32( - a: float32x4_t, - b: float32x4_t, - c: float32x4_t, -) -> float32x4_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { - vmlaq_f32( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } +pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t { + transmute(vld4q_dup_s32(transmute(a))) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_s16)"] +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) + assert_instr(ld4r) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -26658,27 +26114,21 @@ pub fn vmlaq_laneq_f32( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmla_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { - vmla_s16( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } +pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t { + transmute(vld4_dup_s8(transmute(a))) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_u16)"] +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) + assert_instr(ld4r) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -26687,27 +26137,21 @@ pub fn vmla_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmla_lane_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { - vmla_u16( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } +pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t { + transmute(vld4q_dup_s8(transmute(a))) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_s16)"] +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) + assert_instr(ld4r) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -26716,27 +26160,21 @@ pub fn vmla_lane_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_ target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmla_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t { - static_assert_uimm_bits!(LANE, 3); - unsafe { - vmla_s16( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } +pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t { + transmute(vld4_dup_s16(transmute(a))) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_u16)"] +#[doc = "Load single 4-element structure and replicate to all lanes of four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) + assert_instr(ld4r) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -26745,717 +26183,812 @@ pub fn vmla_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x8_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmla_laneq_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t { - static_assert_uimm_bits!(LANE, 3); - unsafe { - vmla_u16( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } +pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t { + transmute(vld4q_dup_s16(transmute(a))) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_s16)"] +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) -)] -#[rustc_legacy_const_generics(3)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmlaq_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { - vmlaq_s16( - a, - b, - simd_shuffle!( - c, - c, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ), - ) +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld4_f16(a: *const f16) -> float16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4f16.p0")] + fn _vld4_f16(ptr: *const f16, size: i32) -> float16x4x4_t; } + _vld4_f16(a as _, 2) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_u16)"] +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) -)] -#[rustc_legacy_const_generics(3)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmlaq_lane_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { - vmlaq_u16( - a, - b, - simd_shuffle!( - c, - c, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ), - ) +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld4q_f16(a: *const f16) -> float16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8f16.p0")] + fn _vld4q_f16(ptr: *const f16, size: i32) -> float16x8x4_t; } + _vld4q_f16(a as _, 2) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_s16)"] +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) -)] -#[rustc_legacy_const_generics(3)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(ld4) )] -pub fn vmlaq_laneq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { - static_assert_uimm_bits!(LANE, 3); - unsafe { - vmlaq_s16( - a, - b, - simd_shuffle!( - c, - c, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ), - ) - } +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld4_f16(a: *const f16) -> float16x4x4_t { + crate::core_arch::macros::deinterleaving_load!(f16, 4, 4, a) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_u16)"] +#[doc = "Load single 4-element structure and replicate to all lanes of two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) -)] -#[rustc_legacy_const_generics(3)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(ld4) )] -pub fn vmlaq_laneq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { - static_assert_uimm_bits!(LANE, 3); - unsafe { - vmlaq_u16( - a, - b, - simd_shuffle!( - c, - c, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ), - ) - } +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld4q_f16(a: *const f16) -> float16x8x4_t { + crate::core_arch::macros::deinterleaving_load!(f16, 8, 4, a) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_s32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) -)] -#[rustc_legacy_const_generics(3)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmla_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { vmla_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld4))] +pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t { + crate::core_arch::macros::deinterleaving_load!(f32, 2, 4, a) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_u32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) -)] -#[rustc_legacy_const_generics(3)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmla_lane_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { vmla_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld4))] +pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t { + crate::core_arch::macros::deinterleaving_load!(f32, 4, 4, a) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_s32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) -)] -#[rustc_legacy_const_generics(3)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmla_laneq_s32(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { vmla_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld4))] +pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t { + crate::core_arch::macros::deinterleaving_load!(i8, 8, 4, a) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_u32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) -)] -#[rustc_legacy_const_generics(3)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmla_laneq_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { vmla_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld4))] +pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t { + crate::core_arch::macros::deinterleaving_load!(i8, 16, 4, a) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_s32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) -)] -#[rustc_legacy_const_generics(3)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmlaq_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { - vmlaq_s32( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld4))] +pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t { + crate::core_arch::macros::deinterleaving_load!(i16, 4, 4, a) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_u32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) -)] -#[rustc_legacy_const_generics(3)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmlaq_lane_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { - vmlaq_u32( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld4))] +pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t { + crate::core_arch::macros::deinterleaving_load!(i16, 8, 4, a) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_s32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) -)] -#[rustc_legacy_const_generics(3)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmlaq_laneq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { - vmlaq_s32( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld4))] +pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t { + crate::core_arch::macros::deinterleaving_load!(i32, 2, 4, a) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_u32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla, LANE = 1) -)] -#[rustc_legacy_const_generics(3)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmlaq_laneq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { - vmlaq_u32( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(ld4))] +pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t { + crate::core_arch::macros::deinterleaving_load!(i32, 4, 4, a) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_f32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmla_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { - vmla_f32(a, b, vdup_n_f32(c)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2f32.p0")] + fn _vld4_f32(ptr: *const i8, size: i32) -> float32x2x4_t; + } + _vld4_f32(a as *const i8, 4) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_f32)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmlaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { - vmlaq_f32(a, b, vdupq_n_f32(c)) +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4f32.p0")] + fn _vld4q_f32(ptr: *const i8, size: i32) -> float32x4x4_t; + } + _vld4q_f32(a as *const i8, 4) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_s16)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmla_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t { - vmla_s16(a, b, vdup_n_s16(c)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i8.p0")] + fn _vld4_s8(ptr: *const i8, size: i32) -> int8x8x4_t; + } + _vld4_s8(a as *const i8, 1) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_s16)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmlaq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t { - vmlaq_s16(a, b, vdupq_n_s16(c)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v16i8.p0")] + fn _vld4q_s8(ptr: *const i8, size: i32) -> int8x16x4_t; + } + _vld4q_s8(a as *const i8, 1) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_u16)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmla_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t { - vmla_u16(a, b, vdup_n_u16(c)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i16.p0")] + fn _vld4_s16(ptr: *const i8, size: i32) -> int16x4x4_t; + } + _vld4_s16(a as *const i8, 2) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_u16)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmlaq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t { - vmlaq_u16(a, b, vdupq_n_u16(c)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i16.p0")] + fn _vld4q_s16(ptr: *const i8, size: i32) -> int16x8x4_t; + } + _vld4q_s16(a as *const i8, 2) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_s32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmla_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t { - vmla_s32(a, b, vdup_n_s32(c)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2i32.p0")] + fn _vld4_s32(ptr: *const i8, size: i32) -> int32x2x4_t; + } + _vld4_s32(a as *const i8, 4) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_s32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmlaq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t { - vmlaq_s32(a, b, vdupq_n_s32(c)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i32.p0")] + fn _vld4q_s32(ptr: *const i8, size: i32) -> int32x4x4_t; + } + _vld4q_s32(a as *const i8, 4) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_u32)"] +#[doc = "Load multiple 4-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmla_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t { - vmla_u32(a, b, vdup_n_u32(c)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld4_lane_f16(a: *const f16, b: float16x4x4_t) -> float16x4x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4f16.p0")] + fn _vld4_lane_f16( + ptr: *const f16, + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + n: i32, + size: i32, + ) -> float16x4x4_t; + } + _vld4_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2) } -#[doc = "Vector multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_u32)"] +#[doc = "Load multiple 4-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmlaq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t { - vmlaq_u32(a, b, vdupq_n_u32(c)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld4q_lane_f16(a: *const f16, b: float16x8x4_t) -> float16x8x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8f16.p0")] + fn _vld4q_lane_f16( + ptr: *const f16, + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + n: i32, + size: i32, + ) -> float16x8x4_t; + } + _vld4q_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2) } -#[doc = "Multiply-add to accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s8)"] +#[doc = "Load multiple 4-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))] +#[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(ld4, LANE = 0) )] -pub fn vmla_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { - unsafe { simd_add(a, simd_mul(b, c)) } +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld4_lane_f16(a: *const f16, b: float16x4x4_t) -> float16x4x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v4f16.p0" + )] + fn _vld4_lane_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + n: i64, + ptr: *const f16, + ) -> float16x4x4_t; + } + _vld4_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Multiply-add to accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s8)"] +#[doc = "Load multiple 4-element structures to two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))] +#[cfg(not(target_arch = "arm"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(ld4, LANE = 0) )] -pub fn vmlaq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t { - unsafe { simd_add(a, simd_mul(b, c)) } +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vld4q_lane_f16(a: *const f16, b: float16x8x4_t) -> float16x8x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v8f16.p0" + )] + fn _vld4q_lane_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + n: i64, + ptr: *const f16, + ) -> float16x8x4_t; + } + _vld4q_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Multiply-add to accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s16)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmla_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { - unsafe { simd_add(a, simd_mul(b, c)) } +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_lane_f32(a: *const f32, b: float32x2x4_t) -> float32x2x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v2f32.p0" + )] + fn _vld4_lane_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, + d: float32x2_t, + n: i64, + ptr: *const i8, + ) -> float32x2x4_t; + } + _vld4_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Multiply-add to accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s16)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmlaq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { - unsafe { simd_add(a, simd_mul(b, c)) } +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_lane_f32(a: *const f32, b: float32x4x4_t) -> float32x4x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v4f32.p0" + )] + fn _vld4q_lane_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, + d: float32x4_t, + n: i64, + ptr: *const i8, + ) -> float32x4x4_t; + } + _vld4q_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Multiply-add to accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vmla_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { - unsafe { simd_add(a, simd_mul(b, c)) } +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_lane_s8(a: *const i8, b: int8x8x4_t) -> int8x8x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v8i8.p0" + )] + fn _vld4_lane_s8( + a: int8x8_t, + b: int8x8_t, + c: int8x8_t, + d: int8x8_t, + n: i64, + ptr: *const i8, + ) -> int8x8x4_t; + } + _vld4_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Multiply-add to accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_lane_s16(a: *const i16, b: int16x4x4_t) -> int16x4x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v4i16.p0" + )] + fn _vld4_lane_s16( + a: int16x4_t, + b: int16x4_t, + c: int16x4_t, + d: int16x4_t, + n: i64, + ptr: *const i8, + ) -> int16x4x4_t; + } + _vld4_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_lane_s16(a: *const i16, b: int16x8x4_t) -> int16x8x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v8i16.p0" + )] + fn _vld4q_lane_s16( + a: int16x8_t, + b: int16x8_t, + c: int16x8_t, + d: int16x8_t, + n: i64, + ptr: *const i8, + ) -> int16x8x4_t; + } + _vld4q_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_lane_s32(a: *const i32, b: int32x2x4_t) -> int32x2x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v2i32.p0" + )] + fn _vld4_lane_s32( + a: int32x2_t, + b: int32x2_t, + c: int32x2_t, + d: int32x2_t, + n: i64, + ptr: *const i8, + ) -> int32x2x4_t; + } + _vld4_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_lane_s32(a: *const i32, b: int32x4x4_t) -> int32x4x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ld4lane.v4i32.p0" + )] + fn _vld4q_lane_s32( + a: int32x4_t, + b: int32x4_t, + c: int32x4_t, + d: int32x4_t, + n: i64, + ptr: *const i8, + ) -> int32x4x4_t; + } + _vld4q_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4_lane_f32(a: *const f32, b: float32x2x4_t) -> float32x2x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2f32.p0")] + fn _vld4_lane_f32( + ptr: *const i8, + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, + d: float32x2_t, + n: i32, + size: i32, + ) -> float32x2x4_t; + } + _vld4_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4q_lane_f32(a: *const f32, b: float32x4x4_t) -> float32x4x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4f32.p0")] + fn _vld4q_lane_f32( + ptr: *const i8, + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, + d: float32x4_t, + n: i32, + size: i32, + ) -> float32x4x4_t; + } + _vld4q_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4_lane_s8(a: *const i8, b: int8x8x4_t) -> int8x8x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i8.p0")] + fn _vld4_lane_s8( + ptr: *const i8, + a: int8x8_t, + b: int8x8_t, + c: int8x8_t, + d: int8x8_t, + n: i32, + size: i32, + ) -> int8x8x4_t; + } + _vld4_lane_s8(a as _, b.0, b.1, b.2, b.3, LANE, 1) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4_lane_s16(a: *const i16, b: int16x4x4_t) -> int16x4x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i16.p0")] + fn _vld4_lane_s16( + ptr: *const i8, + a: int16x4_t, + b: int16x4_t, + c: int16x4_t, + d: int16x4_t, + n: i32, + size: i32, + ) -> int16x4x4_t; + } + _vld4_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4q_lane_s16(a: *const i16, b: int16x8x4_t) -> int16x8x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i16.p0")] + fn _vld4q_lane_s16( + ptr: *const i8, + a: int16x8_t, + b: int16x8_t, + c: int16x8_t, + d: int16x8_t, + n: i32, + size: i32, + ) -> int16x8x4_t; + } + _vld4q_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4_lane_s32(a: *const i32, b: int32x2x4_t) -> int32x2x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2i32.p0")] + fn _vld4_lane_s32( + ptr: *const i8, + a: int32x2_t, + b: int32x2_t, + c: int32x2_t, + d: int32x2_t, + n: i32, + size: i32, + ) -> int32x2x4_t; + } + _vld4_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vld4q_lane_s32(a: *const i32, b: int32x4x4_t) -> int32x4x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i32.p0")] + fn _vld4q_lane_s32( + ptr: *const i8, + a: int32x4_t, + b: int32x4_t, + c: int32x4_t, + d: int32x4_t, + n: i32, + size: i32, + ) -> int32x4x4_t; + } + _vld4q_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) + assert_instr(ld4, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27464,19 +26997,23 @@ pub fn vmla_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlaq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { - unsafe { simd_add(a, simd_mul(b, c)) } +pub unsafe fn vld4_lane_u8(a: *const u8, b: uint8x8x4_t) -> uint8x8x4_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld4_lane_s8::(transmute(a), transmute(b))) } -#[doc = "Multiply-add to accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u8)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) + assert_instr(ld4, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27485,19 +27022,23 @@ pub fn vmlaq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmla_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { - unsafe { simd_add(a, simd_mul(b, c)) } +pub unsafe fn vld4_lane_u16(a: *const u16, b: uint16x4x4_t) -> uint16x4x4_t { + static_assert_uimm_bits!(LANE, 2); + transmute(vld4_lane_s16::(transmute(a), transmute(b))) } -#[doc = "Multiply-add to accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u8)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) + assert_instr(ld4, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27506,19 +27047,23 @@ pub fn vmla_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlaq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t { - unsafe { simd_add(a, simd_mul(b, c)) } +pub unsafe fn vld4q_lane_u16(a: *const u16, b: uint16x8x4_t) -> uint16x8x4_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld4q_lane_s16::(transmute(a), transmute(b))) } -#[doc = "Multiply-add to accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u16)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) + assert_instr(ld4, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27527,19 +27072,23 @@ pub fn vmlaq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmla_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { - unsafe { simd_add(a, simd_mul(b, c)) } +pub unsafe fn vld4_lane_u32(a: *const u32, b: uint32x2x4_t) -> uint32x2x4_t { + static_assert_uimm_bits!(LANE, 1); + transmute(vld4_lane_s32::(transmute(a), transmute(b))) } -#[doc = "Multiply-add to accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u16)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) + assert_instr(ld4, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27548,19 +27097,23 @@ pub fn vmla_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlaq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { - unsafe { simd_add(a, simd_mul(b, c)) } +pub unsafe fn vld4q_lane_u32(a: *const u32, b: uint32x4x4_t) -> uint32x4x4_t { + static_assert_uimm_bits!(LANE, 2); + transmute(vld4q_lane_s32::(transmute(a), transmute(b))) } -#[doc = "Multiply-add to accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) + assert_instr(ld4, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27569,19 +27122,23 @@ pub fn vmlaq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmla_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { - unsafe { simd_add(a, simd_mul(b, c)) } +pub unsafe fn vld4_lane_p8(a: *const p8, b: poly8x8x4_t) -> poly8x8x4_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld4_lane_s8::(transmute(a), transmute(b))) } -#[doc = "Multiply-add to accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mla) + assert_instr(ld4, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27590,20 +27147,23 @@ pub fn vmla_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlaq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { - unsafe { simd_add(a, simd_mul(b, c)) } +pub unsafe fn vld4_lane_p16(a: *const p16, b: poly16x4x4_t) -> poly16x4x4_t { + static_assert_uimm_bits!(LANE, 2); + transmute(vld4_lane_s16::(transmute(a), transmute(b))) } -#[doc = "Vector widening multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_s16)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlal, LANE = 1) + assert_instr(ld4, LANE = 0) )] -#[rustc_legacy_const_generics(3)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27612,27 +27172,22 @@ pub fn vmlaq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { - vmlal_s16( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } +pub unsafe fn vld4q_lane_p16(a: *const p16, b: poly16x8x4_t) -> poly16x8x4_t { + static_assert_uimm_bits!(LANE, 3); + transmute(vld4q_lane_s16::(transmute(a), transmute(b))) } -#[doc = "Vector widening multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_s16)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlal, LANE = 1) + assert_instr(nop) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27641,27 +27196,49 @@ pub fn vmlal_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_laneq_s16(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 3); - unsafe { - vmlal_s16( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) +pub unsafe fn vld4_p64(a: *const p64) -> poly64x1x4_t { + transmute(vld4_s64(transmute(a))) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t { + crate::ptr::read_unaligned(a.cast()) +} +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v1i64.p0")] + fn _vld4_s64(ptr: *const i8, size: i32) -> int64x1x4_t; } + _vld4_s64(a as *const i8, 8) } -#[doc = "Vector widening multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_s32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlal, LANE = 1) + assert_instr(nop) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27670,21 +27247,21 @@ pub fn vmlal_laneq_s16(a: int32x4_t, b: int16x4_t, c: int16x8_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { vmlal_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +pub unsafe fn vld4_u64(a: *const u64) -> uint64x1x4_t { + transmute(vld4_s64(transmute(a))) } -#[doc = "Vector widening multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_s32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlal, LANE = 1) + assert_instr(ld4) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27693,21 +27270,21 @@ pub fn vmlal_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { vmlal_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +pub unsafe fn vld4_u8(a: *const u8) -> uint8x8x4_t { + transmute(vld4_s8(transmute(a))) } -#[doc = "Vector widening multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_u16)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlal, LANE = 1) + assert_instr(ld4) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27716,27 +27293,21 @@ pub fn vmlal_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_lane_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { - vmlal_u16( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } +pub unsafe fn vld4q_u8(a: *const u8) -> uint8x16x4_t { + transmute(vld4q_s8(transmute(a))) } -#[doc = "Vector widening multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_u16)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlal, LANE = 1) + assert_instr(ld4) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27745,27 +27316,21 @@ pub fn vmlal_lane_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_laneq_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t { - static_assert_uimm_bits!(LANE, 3); - unsafe { - vmlal_u16( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } +pub unsafe fn vld4_u16(a: *const u16) -> uint16x4x4_t { + transmute(vld4_s16(transmute(a))) } -#[doc = "Vector widening multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_u32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlal, LANE = 1) + assert_instr(ld4) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27774,21 +27339,21 @@ pub fn vmlal_laneq_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_lane_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { vmlal_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +pub unsafe fn vld4q_u16(a: *const u16) -> uint16x8x4_t { + transmute(vld4q_s16(transmute(a))) } -#[doc = "Vector widening multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_u32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlal, LANE = 1) + assert_instr(ld4) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -27797,19 +27362,20 @@ pub fn vmlal_lane_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_laneq_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { vmlal_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +pub unsafe fn vld4_u32(a: *const u32) -> uint32x2x4_t { + transmute(vld4_s32(transmute(a))) } -#[doc = "Vector widening multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_s16)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlal) + assert_instr(ld4) )] #[cfg_attr( not(target_arch = "arm"), @@ -27819,18 +27385,20 @@ pub fn vmlal_laneq_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { - vmlal_s16(a, b, vdup_n_s16(c)) +pub unsafe fn vld4q_u32(a: *const u32) -> uint32x4x4_t { + transmute(vld4q_s32(transmute(a))) } -#[doc = "Vector widening multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_s32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlal) + assert_instr(ld4) )] #[cfg_attr( not(target_arch = "arm"), @@ -27840,18 +27408,20 @@ pub fn vmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { - vmlal_s32(a, b, vdup_n_s32(c)) +pub unsafe fn vld4_p8(a: *const p8) -> poly8x8x4_t { + transmute(vld4_s8(transmute(a))) } -#[doc = "Vector widening multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_u16)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlal) + assert_instr(ld4) )] #[cfg_attr( not(target_arch = "arm"), @@ -27861,18 +27431,20 @@ pub fn vmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t { - vmlal_u16(a, b, vdup_n_u16(c)) +pub unsafe fn vld4q_p8(a: *const p8) -> poly8x16x4_t { + transmute(vld4q_s8(transmute(a))) } -#[doc = "Vector widening multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_u32)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlal) + assert_instr(ld4) )] #[cfg_attr( not(target_arch = "arm"), @@ -27882,18 +27454,20 @@ pub fn vmlal_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t { - vmlal_u32(a, b, vdup_n_u32(c)) +pub unsafe fn vld4_p16(a: *const p16) -> poly16x4x4_t { + transmute(vld4_s16(transmute(a))) } -#[doc = "Signed multiply-add long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s8)"] +#[doc = "Load multiple 4-element structures to four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlal) + assert_instr(ld4) )] #[cfg_attr( not(target_arch = "arm"), @@ -27903,18 +27477,20 @@ pub fn vmlal_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t { - unsafe { simd_add(a, vmull_s8(b, c)) } +pub unsafe fn vld4q_p16(a: *const p16) -> poly16x8x4_t { + transmute(vld4q_s16(transmute(a))) } -#[doc = "Signed multiply-add long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s16)"] +#[doc = "Store SIMD&FP register (immediate offset)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vldrq_p128)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlal) + assert_instr(nop) )] #[cfg_attr( not(target_arch = "arm"), @@ -27924,60 +27500,78 @@ pub fn vmlal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { - unsafe { simd_add(a, vmull_s16(b, c)) } +pub unsafe fn vldrq_p128(a: *const p128) -> p128 { + *a } -#[doc = "Signed multiply-add long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s32)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlal) + assert_instr(fmax) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { - unsafe { simd_add(a, vmull_s32(b, c)) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmax.v4f16" + )] + fn _vmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { _vmax_f16(a, b) } } -#[doc = "Unsigned multiply-add long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u8)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u8"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlal) + assert_instr(fmax) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t { - unsafe { simd_add(a, vmull_u8(b, c)) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmax.v8f16" + )] + fn _vmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + unsafe { _vmaxq_f16(a, b) } } -#[doc = "Unsigned multiply-add long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u16)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlal) + assert_instr(fmax) )] #[cfg_attr( not(target_arch = "arm"), @@ -27987,18 +27581,26 @@ pub fn vmlal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { - unsafe { simd_add(a, vmull_u16(b, c)) } +pub fn vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmax.v2f32" + )] + fn _vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { _vmax_f32(a, b) } } -#[doc = "Unsigned multiply-add long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u32)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlal) + assert_instr(fmax) )] #[cfg_attr( not(target_arch = "arm"), @@ -28008,18 +27610,26 @@ pub fn vmlal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { - unsafe { simd_add(a, vmull_u32(b, c)) } +pub fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmax.v4f32" + )] + fn _vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + unsafe { _vmaxq_f32(a, b) } } -#[doc = "Floating-point multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_f32)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul) + assert_instr(smax) )] #[cfg_attr( not(target_arch = "arm"), @@ -28029,18 +27639,21 @@ pub fn vmlal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { - unsafe { simd_sub(a, simd_mul(b, c)) } +pub fn vmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { + let mask: int8x8_t = simd_ge(a, b); + simd_select(mask, a, b) + } } -#[doc = "Floating-point multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_f32)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul) + assert_instr(smax) )] #[cfg_attr( not(target_arch = "arm"), @@ -28050,20 +27663,22 @@ pub fn vmls_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { - unsafe { simd_sub(a, simd_mul(b, c)) } +pub fn vmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { + let mask: int8x16_t = simd_ge(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_f32)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul, LANE = 1) + assert_instr(smax) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -28072,25 +27687,22 @@ pub fn vmlsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_lane_f32( - a: float32x2_t, - b: float32x2_t, - c: float32x2_t, -) -> float32x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { vmls_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +pub fn vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { + let mask: int16x4_t = simd_ge(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_f32)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul, LANE = 1) + assert_instr(smax) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -28099,25 +27711,22 @@ pub fn vmls_lane_f32( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_laneq_f32( - a: float32x2_t, - b: float32x2_t, - c: float32x4_t, -) -> float32x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { vmls_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +pub fn vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { + let mask: int16x8_t = simd_ge(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_f32)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul, LANE = 1) + assert_instr(smax) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -28126,31 +27735,22 @@ pub fn vmls_laneq_f32( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_lane_f32( - a: float32x4_t, - b: float32x4_t, - c: float32x2_t, -) -> float32x4_t { - static_assert_uimm_bits!(LANE, 1); +pub fn vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { unsafe { - vmlsq_f32( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) + let mask: int32x2_t = simd_ge(a, b); + simd_select(mask, a, b) } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_f32)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul, LANE = 1) + assert_instr(smax) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -28159,31 +27759,22 @@ pub fn vmlsq_lane_f32( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_laneq_f32( - a: float32x4_t, - b: float32x4_t, - c: float32x4_t, -) -> float32x4_t { - static_assert_uimm_bits!(LANE, 2); +pub fn vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { unsafe { - vmlsq_f32( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) + let mask: int32x4_t = simd_ge(a, b); + simd_select(mask, a, b) } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_s16)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(umax) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -28192,27 +27783,22 @@ pub fn vmlsq_laneq_f32( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { - static_assert_uimm_bits!(LANE, 2); +pub fn vmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { unsafe { - vmls_s16( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) + let mask: uint8x8_t = simd_ge(a, b); + simd_select(mask, a, b) } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_u16)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(umax) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -28221,27 +27807,22 @@ pub fn vmls_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_lane_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { - static_assert_uimm_bits!(LANE, 2); +pub fn vmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { unsafe { - vmls_u16( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) + let mask: uint8x16_t = simd_ge(a, b); + simd_select(mask, a, b) } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_s16)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(umax) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -28250,27 +27831,22 @@ pub fn vmls_lane_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_ target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t { - static_assert_uimm_bits!(LANE, 3); +pub fn vmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { unsafe { - vmls_s16( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) + let mask: uint16x4_t = simd_ge(a, b); + simd_select(mask, a, b) } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_u16)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(umax) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -28279,27 +27855,22 @@ pub fn vmls_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x8_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_laneq_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t { - static_assert_uimm_bits!(LANE, 3); +pub fn vmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { unsafe { - vmls_u16( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) + let mask: uint16x8_t = simd_ge(a, b); + simd_select(mask, a, b) } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_s16)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(umax) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -28308,40 +27879,22 @@ pub fn vmls_laneq_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x8 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t { - static_assert_uimm_bits!(LANE, 2); +pub fn vmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { unsafe { - vmlsq_s16( - a, - b, - simd_shuffle!( - c, - c, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ), - ) + let mask: uint32x2_t = simd_ge(a, b); + simd_select(mask, a, b) } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_u16)"] +#[doc = "Maximum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(umax) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -28350,124 +27903,82 @@ pub fn vmlsq_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x4_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_lane_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t { - static_assert_uimm_bits!(LANE, 2); +pub fn vmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { unsafe { - vmlsq_u16( - a, - b, - simd_shuffle!( - c, - c, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ), - ) + let mask: uint32x4_t = simd_ge(a, b); + simd_select(mask, a, b) } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_s16)"] +#[doc = "Floating-point Maximum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(fmaxnm) )] -#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_laneq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { - static_assert_uimm_bits!(LANE, 3); - unsafe { - vmlsq_s16( - a, - b, - simd_shuffle!( - c, - c, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ), - ) +#[cfg(not(target_arch = "arm64ec"))] +pub fn vmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnm.v4f16" + )] + fn _vmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; } + unsafe { _vmaxnm_f16(a, b) } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_u16)"] +#[doc = "Floating-point Maximum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(fmaxnm) )] -#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_laneq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { - static_assert_uimm_bits!(LANE, 3); - unsafe { - vmlsq_u16( - a, - b, - simd_shuffle!( - c, - c, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ), - ) +#[cfg(not(target_arch = "arm64ec"))] +pub fn vmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnm.v8f16" + )] + fn _vmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; } + unsafe { _vmaxnmq_f16(a, b) } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_s32)"] +#[doc = "Floating-point Maximum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(fmaxnm) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -28476,21 +27987,27 @@ pub fn vmlsq_laneq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { vmls_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +pub fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnm.v2f32" + )] + fn _vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { _vmaxnm_f32(a, b) } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_u32)"] +#[doc = "Floating-point Maximum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(fmaxnm) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -28499,67 +28016,87 @@ pub fn vmls_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_lane_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { vmls_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +pub fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmaxnm.v4f32" + )] + fn _vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + unsafe { _vmaxnmq_f32(a, b) } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_s32)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(fmin) )] -#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_laneq_s32(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { vmls_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmin.v4f16" + )] + fn _vmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { _vmin_f16(a, b) } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_u32)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(fmin) )] -#[rustc_legacy_const_generics(3)] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_laneq_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { vmls_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmin.v8f16" + )] + fn _vminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + unsafe { _vminq_f16(a, b) } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_s32)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(fmin) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -28568,27 +28105,27 @@ pub fn vmls_laneq_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x4 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { - vmlsq_s32( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) +pub fn vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmin.v2f32" + )] + fn _vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; } + unsafe { _vmin_f32(a, b) } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_u32)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(fmin) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -28597,27 +28134,27 @@ pub fn vmlsq_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x2_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_lane_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { - vmlsq_u32( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) +pub fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fmin.v4f32" + )] + fn _vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; } + unsafe { _vminq_f32(a, b) } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_s32)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(smin) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -28626,27 +28163,22 @@ pub fn vmlsq_lane_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x2 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_laneq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 2); +pub fn vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { unsafe { - vmlsq_s32( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) + let mask: int8x8_t = simd_le(a, b); + simd_select(mask, a, b) } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_u32)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls, LANE = 1) + assert_instr(smin) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -28655,25 +28187,21 @@ pub fn vmlsq_laneq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_laneq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { - static_assert_uimm_bits!(LANE, 2); +pub fn vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { unsafe { - vmlsq_u32( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) + let mask: int8x16_t = simd_le(a, b); + simd_select(mask, a, b) } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_f32)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul) + assert_instr(smin) )] #[cfg_attr( not(target_arch = "arm"), @@ -28683,18 +28211,21 @@ pub fn vmlsq_laneq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { - vmls_f32(a, b, vdup_n_f32(c)) +pub fn vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { + let mask: int16x4_t = simd_le(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_f32)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul) + assert_instr(smin) )] #[cfg_attr( not(target_arch = "arm"), @@ -28704,18 +28235,21 @@ pub fn vmls_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { - vmlsq_f32(a, b, vdupq_n_f32(c)) +pub fn vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { + let mask: int16x8_t = simd_le(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_s16)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(smin) )] #[cfg_attr( not(target_arch = "arm"), @@ -28725,18 +28259,21 @@ pub fn vmlsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t { - vmls_s16(a, b, vdup_n_s16(c)) +pub fn vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { + let mask: int32x2_t = simd_le(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_s16)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(smin) )] #[cfg_attr( not(target_arch = "arm"), @@ -28746,18 +28283,21 @@ pub fn vmls_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t { - vmlsq_s16(a, b, vdupq_n_s16(c)) +pub fn vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { + let mask: int32x4_t = simd_le(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_u16)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(umin) )] #[cfg_attr( not(target_arch = "arm"), @@ -28767,18 +28307,21 @@ pub fn vmlsq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t { - vmls_u16(a, b, vdup_n_u16(c)) +pub fn vmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { + let mask: uint8x8_t = simd_le(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_u16)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(umin) )] #[cfg_attr( not(target_arch = "arm"), @@ -28788,18 +28331,21 @@ pub fn vmls_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t { - vmlsq_u16(a, b, vdupq_n_u16(c)) +pub fn vminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { + let mask: uint8x16_t = simd_le(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_s32)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(umin) )] #[cfg_attr( not(target_arch = "arm"), @@ -28809,18 +28355,21 @@ pub fn vmlsq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t { - vmls_s32(a, b, vdup_n_s32(c)) +pub fn vmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { + let mask: uint16x4_t = simd_le(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_s32)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(umin) )] #[cfg_attr( not(target_arch = "arm"), @@ -28830,18 +28379,21 @@ pub fn vmls_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t { - vmlsq_s32(a, b, vdupq_n_s32(c)) +pub fn vminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { + let mask: uint16x8_t = simd_le(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_u32)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(umin) )] #[cfg_attr( not(target_arch = "arm"), @@ -28851,18 +28403,21 @@ pub fn vmlsq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t { - vmls_u32(a, b, vdup_n_u32(c)) +pub fn vmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { + let mask: uint32x2_t = simd_le(a, b); + simd_select(mask, a, b) + } } -#[doc = "Vector multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_u32)"] +#[doc = "Minimum (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(umin) )] #[cfg_attr( not(target_arch = "arm"), @@ -28872,60 +28427,81 @@ pub fn vmls_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t { - vmlsq_u32(a, b, vdupq_n_u32(c)) +pub fn vminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { + let mask: uint32x4_t = simd_le(a, b); + simd_select(mask, a, b) + } } -#[doc = "Multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s8)"] +#[doc = "Floating-point Minimum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(fminnm) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { - unsafe { simd_sub(a, simd_mul(b, c)) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnm.v4f16" + )] + fn _vminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { _vminnm_f16(a, b) } } -#[doc = "Multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s8)"] +#[doc = "Floating-point Minimum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(fminnm) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t { - unsafe { simd_sub(a, simd_mul(b, c)) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnm.v8f16" + )] + fn _vminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + unsafe { _vminnmq_f16(a, b) } } -#[doc = "Multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s16)"] +#[doc = "Floating-point Minimum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(fminnm) )] #[cfg_attr( not(target_arch = "arm"), @@ -28935,18 +28511,26 @@ pub fn vmlsq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { - unsafe { simd_sub(a, simd_mul(b, c)) } +pub fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnm.v2f32" + )] + fn _vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { _vminnm_f32(a, b) } } -#[doc = "Multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s16)"] +#[doc = "Floating-point Minimum Number (vector)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(fminnm) )] #[cfg_attr( not(target_arch = "arm"), @@ -28956,18 +28540,26 @@ pub fn vmls_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { - unsafe { simd_sub(a, simd_mul(b, c)) } +pub fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fminnm.v4f32" + )] + fn _vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + unsafe { _vminnmq_f32(a, b) } } -#[doc = "Multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s32)"] +#[doc = "Floating-point multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(fmul) )] #[cfg_attr( not(target_arch = "arm"), @@ -28977,18 +28569,18 @@ pub fn vmlsq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { - unsafe { simd_sub(a, simd_mul(b, c)) } +pub fn vmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + unsafe { simd_add(a, simd_mul(b, c)) } } -#[doc = "Multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s32)"] +#[doc = "Floating-point multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(fmul) )] #[cfg_attr( not(target_arch = "arm"), @@ -28998,19 +28590,20 @@ pub fn vmls_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { - unsafe { simd_sub(a, simd_mul(b, c)) } +pub fn vmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + unsafe { simd_add(a, simd_mul(b, c)) } } -#[doc = "Multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u8)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(fmul, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29019,19 +28612,25 @@ pub fn vmlsq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { - unsafe { simd_sub(a, simd_mul(b, c)) } +pub fn vmla_lane_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 1); + vmla_f32(a, b, vdup_lane_f32::(c)) } -#[doc = "Multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u8)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(fmul, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29040,19 +28639,25 @@ pub fn vmls_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t { - unsafe { simd_sub(a, simd_mul(b, c)) } +pub fn vmla_laneq_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x4_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 2); + vmla_f32(a, b, vdup_laneq_f32::(c)) } -#[doc = "Multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u16)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(fmul, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29061,19 +28666,25 @@ pub fn vmlsq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { - unsafe { simd_sub(a, simd_mul(b, c)) } +pub fn vmlaq_lane_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x2_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 1); + vmlaq_f32(a, b, vdupq_lane_f32::(c)) } -#[doc = "Multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u16)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(fmul, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29082,19 +28693,25 @@ pub fn vmls_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { - unsafe { simd_sub(a, simd_mul(b, c)) } +pub fn vmlaq_laneq_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 2); + vmlaq_f32(a, b, vdupq_laneq_f32::(c)) } -#[doc = "Multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u32)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(mla, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29103,19 +28720,21 @@ pub fn vmlsq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmls_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { - unsafe { simd_sub(a, simd_mul(b, c)) } +pub fn vmla_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 2); + vmla_s16(a, b, vdup_lane_s16::(c)) } -#[doc = "Multiply-subtract from accumulator"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u32)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mls) + assert_instr(mla, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29124,18 +28743,19 @@ pub fn vmls_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { - unsafe { simd_sub(a, simd_mul(b, c)) } +pub fn vmla_lane_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(LANE, 2); + vmla_u16(a, b, vdup_lane_u16::(c)) } -#[doc = "Vector widening multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_s16)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlsl, LANE = 1) + assert_instr(mla, LANE = 1) )] #[rustc_legacy_const_generics(3)] #[cfg_attr( @@ -29146,25 +28766,19 @@ pub fn vmlsq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { - vmlsl_s16( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } +pub fn vmla_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 3); + vmla_s16(a, b, vdup_laneq_s16::(c)) } -#[doc = "Vector widening multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_s16)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlsl, LANE = 1) + assert_instr(mla, LANE = 1) )] #[rustc_legacy_const_generics(3)] #[cfg_attr( @@ -29175,25 +28789,19 @@ pub fn vmlsl_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_laneq_s16(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t { +pub fn vmla_laneq_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { - vmlsl_s16( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } + vmla_u16(a, b, vdup_laneq_u16::(c)) } -#[doc = "Vector widening multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_s32)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlsl, LANE = 1) + assert_instr(mla, LANE = 1) )] #[rustc_legacy_const_generics(3)] #[cfg_attr( @@ -29204,19 +28812,19 @@ pub fn vmlsl_laneq_s16(a: int32x4_t, b: int16x4_t, c: int16x8_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { vmlsl_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +pub fn vmlaq_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 2); + vmlaq_s16(a, b, vdupq_lane_s16::(c)) } -#[doc = "Vector widening multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_s32)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlsl, LANE = 1) + assert_instr(mla, LANE = 1) )] #[rustc_legacy_const_generics(3)] #[cfg_attr( @@ -29227,19 +28835,19 @@ pub fn vmlsl_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t { +pub fn vmlaq_lane_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmlsl_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } + vmlaq_u16(a, b, vdupq_lane_u16::(c)) } -#[doc = "Vector widening multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_u16)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlsl, LANE = 1) + assert_instr(mla, LANE = 1) )] #[rustc_legacy_const_generics(3)] #[cfg_attr( @@ -29250,25 +28858,19 @@ pub fn vmlsl_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_lane_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { - vmlsl_u16( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } +pub fn vmlaq_laneq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 3); + vmlaq_s16(a, b, vdupq_laneq_s16::(c)) } -#[doc = "Vector widening multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_u16)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlsl, LANE = 1) + assert_instr(mla, LANE = 1) )] #[rustc_legacy_const_generics(3)] #[cfg_attr( @@ -29279,25 +28881,19 @@ pub fn vmlsl_lane_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_laneq_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t { +pub fn vmlaq_laneq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { static_assert_uimm_bits!(LANE, 3); - unsafe { - vmlsl_u16( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } + vmlaq_u16(a, b, vdupq_laneq_u16::(c)) } -#[doc = "Vector widening multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_u32)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlsl, LANE = 1) + assert_instr(mla, LANE = 1) )] #[rustc_legacy_const_generics(3)] #[cfg_attr( @@ -29308,19 +28904,19 @@ pub fn vmlsl_laneq_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_lane_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { +pub fn vmla_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { vmlsl_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } + vmla_s32(a, b, vdup_lane_s32::(c)) } -#[doc = "Vector widening multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_u32)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlsl, LANE = 1) + assert_instr(mla, LANE = 1) )] #[rustc_legacy_const_generics(3)] #[cfg_attr( @@ -29331,20 +28927,21 @@ pub fn vmlsl_lane_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_laneq_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { vmlsl_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) } +pub fn vmla_lane_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(LANE, 1); + vmla_u32(a, b, vdup_lane_u32::(c)) } -#[doc = "Vector widening multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_s16)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlsl) + assert_instr(mla, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29353,19 +28950,21 @@ pub fn vmlsl_laneq_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { - vmlsl_s16(a, b, vdup_n_s16(c)) +pub fn vmla_laneq_s32(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 2); + vmla_s32(a, b, vdup_laneq_s32::(c)) } -#[doc = "Vector widening multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_s32)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlsl) + assert_instr(mla, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29374,19 +28973,21 @@ pub fn vmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { - vmlsl_s32(a, b, vdup_n_s32(c)) +pub fn vmla_laneq_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t { + static_assert_uimm_bits!(LANE, 2); + vmla_u32(a, b, vdup_laneq_u32::(c)) } -#[doc = "Vector widening multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_u16)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlsl) + assert_instr(mla, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29395,19 +28996,21 @@ pub fn vmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t { - vmlsl_u16(a, b, vdup_n_u16(c)) +pub fn vmlaq_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 1); + vmlaq_s32(a, b, vdupq_lane_s32::(c)) } -#[doc = "Vector widening multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_u32)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlsl) + assert_instr(mla, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29416,19 +29019,21 @@ pub fn vmlsl_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t { - vmlsl_u32(a, b, vdup_n_u32(c)) +pub fn vmlaq_lane_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 1); + vmlaq_u32(a, b, vdupq_lane_u32::(c)) } -#[doc = "Signed multiply-subtract long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s8)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlsl) + assert_instr(mla, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29437,19 +29042,21 @@ pub fn vmlsl_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t { - unsafe { simd_sub(a, vmull_s8(b, c)) } +pub fn vmlaq_laneq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + vmlaq_s32(a, b, vdupq_laneq_s32::(c)) } -#[doc = "Signed multiply-subtract long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s16)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlsl) + assert_instr(mla, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29458,18 +29065,19 @@ pub fn vmlsl_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { - unsafe { simd_sub(a, vmull_s16(b, c)) } +pub fn vmlaq_laneq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 2); + vmlaq_u32(a, b, vdupq_laneq_u32::(c)) } -#[doc = "Signed multiply-subtract long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s32)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smlsl) + assert_instr(fmul) )] #[cfg_attr( not(target_arch = "arm"), @@ -29479,18 +29087,18 @@ pub fn vmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { - unsafe { simd_sub(a, vmull_s32(b, c)) } +pub fn vmla_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { + vmla_f32(a, b, vdup_n_f32(c)) } -#[doc = "Unsigned multiply-subtract long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u8)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlsl) + assert_instr(fmul) )] #[cfg_attr( not(target_arch = "arm"), @@ -29500,18 +29108,18 @@ pub fn vmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t { - unsafe { simd_sub(a, vmull_u8(b, c)) } +pub fn vmlaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { + vmlaq_f32(a, b, vdupq_n_f32(c)) } -#[doc = "Unsigned multiply-subtract long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u16)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlsl) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29521,18 +29129,18 @@ pub fn vmlsl_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { - unsafe { simd_sub(a, vmull_u16(b, c)) } +pub fn vmla_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t { + vmla_s16(a, b, vdup_n_s16(c)) } -#[doc = "Unsigned multiply-subtract long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u32)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umlsl) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29542,108 +29150,81 @@ pub fn vmlsl_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmlsl_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { - unsafe { simd_sub(a, vmull_u32(b, c)) } +pub fn vmlaq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t { + vmlaq_s16(a, b, vdupq_n_s16(c)) } -#[doc = "8-bit integer matrix multiply-accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmmlaq_s32)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_u16)"] #[inline] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smmla) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), - unstable(feature = "stdarch_neon_i8mm", issue = "117223") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.smmla.v4i32.v16i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.smmla.v4i32.v16i8")] - fn _vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t; - } - unsafe { _vmmlaq_s32(a, b, c) } +pub fn vmla_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t { + vmla_u16(a, b, vdup_n_u16(c)) } -#[doc = "8-bit integer matrix multiply-accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmmlaq_u32)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_u16)"] #[inline] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ummla) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), - unstable(feature = "stdarch_neon_i8mm", issue = "117223") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ummla.v4i32.v16i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ummla.v4i32.v16i8")] - fn _vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t; - } - unsafe { _vmmlaq_u32(a, b, c) } +pub fn vmlaq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t { + vmlaq_u16(a, b, vdupq_n_u16(c)) } -#[doc = "Duplicate element to vector"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_f16)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(mla) )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vmov_n_f16(a: f16) -> float16x4_t { - vdup_n_f16(a) -} -#[doc = "Duplicate element to vector"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_f16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") )] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vmovq_n_f16(a: f16) -> float16x8_t { - vdupq_n_f16(a) +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vmla_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t { + vmla_s32(a, b, vdup_n_s32(c)) } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_f32)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29653,18 +29234,18 @@ pub fn vmovq_n_f16(a: f16) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmov_n_f32(value: f32) -> float32x2_t { - vdup_n_f32(value) +pub fn vmlaq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t { + vmlaq_s32(a, b, vdupq_n_s32(c)) } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_p16)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29674,18 +29255,18 @@ pub fn vmov_n_f32(value: f32) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmov_n_p16(value: p16) -> poly16x4_t { - vdup_n_p16(value) +pub fn vmla_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t { + vmla_u32(a, b, vdup_n_u32(c)) } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_p8)"] +#[doc = "Vector multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29695,18 +29276,18 @@ pub fn vmov_n_p16(value: p16) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmov_n_p8(value: p8) -> poly8x8_t { - vdup_n_p8(value) +pub fn vmlaq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t { + vmlaq_u32(a, b, vdupq_n_u32(c)) } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s16)"] +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29716,18 +29297,18 @@ pub fn vmov_n_p8(value: p8) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmov_n_s16(value: i16) -> int16x4_t { - vdup_n_s16(value) +pub fn vmla_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { + unsafe { simd_add(a, simd_mul(b, c)) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s32)"] +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29737,18 +29318,18 @@ pub fn vmov_n_s16(value: i16) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmov_n_s32(value: i32) -> int32x2_t { - vdup_n_s32(value) +pub fn vmlaq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t { + unsafe { simd_add(a, simd_mul(b, c)) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s64)"] +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmov) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29758,18 +29339,18 @@ pub fn vmov_n_s32(value: i32) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmov_n_s64(value: i64) -> int64x1_t { - vdup_n_s64(value) +pub fn vmla_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { + unsafe { simd_add(a, simd_mul(b, c)) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s8)"] +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29779,18 +29360,18 @@ pub fn vmov_n_s64(value: i64) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmov_n_s8(value: i8) -> int8x8_t { - vdup_n_s8(value) +pub fn vmlaq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { + unsafe { simd_add(a, simd_mul(b, c)) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u16)"] +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29800,18 +29381,18 @@ pub fn vmov_n_s8(value: i8) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmov_n_u16(value: u16) -> uint16x4_t { - vdup_n_u16(value) +pub fn vmla_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { + unsafe { simd_add(a, simd_mul(b, c)) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u32)"] +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29821,18 +29402,18 @@ pub fn vmov_n_u16(value: u16) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmov_n_u32(value: u32) -> uint32x2_t { - vdup_n_u32(value) +pub fn vmlaq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { + unsafe { simd_add(a, simd_mul(b, c)) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u64)"] +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmov) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29842,18 +29423,18 @@ pub fn vmov_n_u32(value: u32) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmov_n_u64(value: u64) -> uint64x1_t { - vdup_n_u64(value) +pub fn vmla_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { + unsafe { simd_add(a, simd_mul(b, c)) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u8)"] +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29863,18 +29444,18 @@ pub fn vmov_n_u64(value: u64) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmov_n_u8(value: u8) -> uint8x8_t { - vdup_n_u8(value) +pub fn vmlaq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t { + unsafe { simd_add(a, simd_mul(b, c)) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_f32)"] +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29884,18 +29465,18 @@ pub fn vmov_n_u8(value: u8) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovq_n_f32(value: f32) -> float32x4_t { - vdupq_n_f32(value) +pub fn vmla_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { + unsafe { simd_add(a, simd_mul(b, c)) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_p16)"] +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29905,18 +29486,18 @@ pub fn vmovq_n_f32(value: f32) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovq_n_p16(value: p16) -> poly16x8_t { - vdupq_n_p16(value) +pub fn vmlaq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { + unsafe { simd_add(a, simd_mul(b, c)) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_p8)"] +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29926,18 +29507,18 @@ pub fn vmovq_n_p16(value: p16) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovq_n_p8(value: p8) -> poly8x16_t { - vdupq_n_p8(value) +pub fn vmla_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { + unsafe { simd_add(a, simd_mul(b, c)) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s16)"] +#[doc = "Multiply-add to accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(mla) )] #[cfg_attr( not(target_arch = "arm"), @@ -29947,19 +29528,20 @@ pub fn vmovq_n_p8(value: p8) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovq_n_s16(value: i16) -> int16x8_t { - vdupq_n_s16(value) +pub fn vmlaq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + unsafe { simd_add(a, simd_mul(b, c)) } } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s32)"] +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(smlal, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29968,19 +29550,21 @@ pub fn vmovq_n_s16(value: i16) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovq_n_s32(value: i32) -> int32x4_t { - vdupq_n_s32(value) +pub fn vmlal_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + vmlal_s16(a, b, vdup_lane_s16::(c)) } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s64)"] +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(smlal, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -29989,19 +29573,21 @@ pub fn vmovq_n_s32(value: i32) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovq_n_s64(value: i64) -> int64x2_t { - vdupq_n_s64(value) +pub fn vmlal_laneq_s16(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 3); + vmlal_s16(a, b, vdup_laneq_s16::(c)) } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s8)"] +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(smlal, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30010,19 +29596,21 @@ pub fn vmovq_n_s64(value: i64) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovq_n_s8(value: i8) -> int8x16_t { - vdupq_n_s8(value) +pub fn vmlal_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + static_assert_uimm_bits!(LANE, 1); + vmlal_s32(a, b, vdup_lane_s32::(c)) } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u16)"] +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(smlal, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30031,19 +29619,21 @@ pub fn vmovq_n_s8(value: i8) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovq_n_u16(value: u16) -> uint16x8_t { - vdupq_n_u16(value) +pub fn vmlal_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t { + static_assert_uimm_bits!(LANE, 2); + vmlal_s32(a, b, vdup_laneq_s32::(c)) } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u32)"] +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(umlal, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30052,19 +29642,21 @@ pub fn vmovq_n_u16(value: u16) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovq_n_u32(value: u32) -> uint32x4_t { - vdupq_n_u32(value) +pub fn vmlal_lane_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 2); + vmlal_u16(a, b, vdup_lane_u16::(c)) } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u64)"] +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(umlal, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30073,19 +29665,21 @@ pub fn vmovq_n_u32(value: u32) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovq_n_u64(value: u64) -> uint64x2_t { - vdupq_n_u64(value) +pub fn vmlal_laneq_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 3); + vmlal_u16(a, b, vdup_laneq_u16::(c)) } -#[doc = "Duplicate vector element to vector or scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u8)"] +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(dup) + assert_instr(umlal, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30094,19 +29688,21 @@ pub fn vmovq_n_u64(value: u64) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovq_n_u8(value: u8) -> uint8x16_t { - vdupq_n_u8(value) +pub fn vmlal_lane_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { + static_assert_uimm_bits!(LANE, 1); + vmlal_u32(a, b, vdup_lane_u32::(c)) } -#[doc = "Vector long move."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_s16)"] +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sxtl) + assert_instr(umlal, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30115,18 +29711,19 @@ pub fn vmovq_n_u8(value: u8) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovl_s16(a: int16x4_t) -> int32x4_t { - unsafe { simd_cast(a) } +pub fn vmlal_laneq_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t { + static_assert_uimm_bits!(LANE, 2); + vmlal_u32(a, b, vdup_laneq_u32::(c)) } -#[doc = "Vector long move."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_s32)"] +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sxtl) + assert_instr(smlal) )] #[cfg_attr( not(target_arch = "arm"), @@ -30136,18 +29733,18 @@ pub fn vmovl_s16(a: int16x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovl_s32(a: int32x2_t) -> int64x2_t { - unsafe { simd_cast(a) } +pub fn vmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { + vmlal_s16(a, b, vdup_n_s16(c)) } -#[doc = "Vector long move."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_s8)"] +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sxtl) + assert_instr(smlal) )] #[cfg_attr( not(target_arch = "arm"), @@ -30157,18 +29754,18 @@ pub fn vmovl_s32(a: int32x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovl_s8(a: int8x8_t) -> int16x8_t { - unsafe { simd_cast(a) } +pub fn vmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { + vmlal_s32(a, b, vdup_n_s32(c)) } -#[doc = "Vector long move."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_u16)"] +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uxtl) + assert_instr(umlal) )] #[cfg_attr( not(target_arch = "arm"), @@ -30178,18 +29775,18 @@ pub fn vmovl_s8(a: int8x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovl_u16(a: uint16x4_t) -> uint32x4_t { - unsafe { simd_cast(a) } +pub fn vmlal_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t { + vmlal_u16(a, b, vdup_n_u16(c)) } -#[doc = "Vector long move."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_u32)"] +#[doc = "Vector widening multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uxtl) + assert_instr(umlal) )] #[cfg_attr( not(target_arch = "arm"), @@ -30199,18 +29796,18 @@ pub fn vmovl_u16(a: uint16x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovl_u32(a: uint32x2_t) -> uint64x2_t { - unsafe { simd_cast(a) } +pub fn vmlal_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t { + vmlal_u32(a, b, vdup_n_u32(c)) } -#[doc = "Vector long move."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_u8)"] +#[doc = "Signed multiply-add long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uxtl) + assert_instr(smlal) )] #[cfg_attr( not(target_arch = "arm"), @@ -30220,18 +29817,18 @@ pub fn vmovl_u32(a: uint32x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovl_u8(a: uint8x8_t) -> uint16x8_t { - unsafe { simd_cast(a) } +pub fn vmlal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t { + unsafe { simd_add(a, vmull_s8(b, c)) } } -#[doc = "Vector narrow integer."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_s16)"] +#[doc = "Signed multiply-add long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(xtn) + assert_instr(smlal) )] #[cfg_attr( not(target_arch = "arm"), @@ -30241,18 +29838,18 @@ pub fn vmovl_u8(a: uint8x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovn_s16(a: int16x8_t) -> int8x8_t { - unsafe { simd_cast(a) } +pub fn vmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + unsafe { simd_add(a, vmull_s16(b, c)) } } -#[doc = "Vector narrow integer."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_s32)"] +#[doc = "Signed multiply-add long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(xtn) + assert_instr(smlal) )] #[cfg_attr( not(target_arch = "arm"), @@ -30262,18 +29859,18 @@ pub fn vmovn_s16(a: int16x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovn_s32(a: int32x4_t) -> int16x4_t { - unsafe { simd_cast(a) } +pub fn vmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + unsafe { simd_add(a, vmull_s32(b, c)) } } -#[doc = "Vector narrow integer."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_s64)"] +#[doc = "Unsigned multiply-add long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(xtn) + assert_instr(umlal) )] #[cfg_attr( not(target_arch = "arm"), @@ -30283,18 +29880,18 @@ pub fn vmovn_s32(a: int32x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovn_s64(a: int64x2_t) -> int32x2_t { - unsafe { simd_cast(a) } +pub fn vmlal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t { + unsafe { simd_add(a, vmull_u8(b, c)) } } -#[doc = "Vector narrow integer."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_u16)"] +#[doc = "Unsigned multiply-add long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(xtn) + assert_instr(umlal) )] #[cfg_attr( not(target_arch = "arm"), @@ -30304,18 +29901,18 @@ pub fn vmovn_s64(a: int64x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovn_u16(a: uint16x8_t) -> uint8x8_t { - unsafe { simd_cast(a) } +pub fn vmlal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { + unsafe { simd_add(a, vmull_u16(b, c)) } } -#[doc = "Vector narrow integer."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_u32)"] +#[doc = "Unsigned multiply-add long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(xtn) + assert_instr(umlal) )] #[cfg_attr( not(target_arch = "arm"), @@ -30325,18 +29922,18 @@ pub fn vmovn_u16(a: uint16x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovn_u32(a: uint32x4_t) -> uint16x4_t { - unsafe { simd_cast(a) } +pub fn vmlal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { + unsafe { simd_add(a, vmull_u32(b, c)) } } -#[doc = "Vector narrow integer."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_u64)"] +#[doc = "Floating-point multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(xtn) + assert_instr(fmul) )] #[cfg_attr( not(target_arch = "arm"), @@ -30346,63 +29943,68 @@ pub fn vmovn_u32(a: uint32x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmovn_u64(a: uint64x2_t) -> uint32x2_t { - unsafe { simd_cast(a) } +pub fn vmls_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + unsafe { simd_sub(a, simd_mul(b, c)) } } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f16)"] +#[doc = "Floating-point multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_f32)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f16"))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul) )] -#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vmul_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { - unsafe { simd_mul(a, b) } +pub fn vmlsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + unsafe { simd_sub(a, simd_mul(b, c)) } } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_f16)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_f32)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f16"))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul) + assert_instr(fmul, LANE = 1) )] -#[target_feature(enable = "neon,fp16")] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vmulq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { - unsafe { simd_mul(a, b) } +pub fn vmls_lane_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 1); + vmls_f32(a, b, vdup_lane_f32::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul) + assert_instr(fmul, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30411,19 +30013,25 @@ pub fn vmulq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - unsafe { simd_mul(a, b) } +pub fn vmls_laneq_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x4_t, +) -> float32x2_t { + static_assert_uimm_bits!(LANE, 2); + vmls_f32(a, b, vdup_laneq_f32::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_f32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul) + assert_instr(fmul, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30432,91 +30040,75 @@ pub fn vmul_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - unsafe { simd_mul(a, b) } +pub fn vmlsq_lane_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x2_t, +) -> float32x4_t { + static_assert_uimm_bits!(LANE, 1); + vmlsq_f32(a, b, vdupq_lane_f32::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f16)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_f32)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul, LANE = 1) )] -#[rustc_legacy_const_generics(2)] -#[target_feature(enable = "neon,fp16")] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vmul_lane_f16(a: float16x4_t, v: float16x4_t) -> float16x4_t { +pub fn vmlsq_laneq_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, +) -> float32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - simd_mul( - a, - simd_shuffle!(v, v, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } + vmlsq_f32(a, b, vdupq_laneq_f32::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f16)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_s16)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul, LANE = 1) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] -#[target_feature(enable = "neon,fp16")] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vmulq_lane_f16(a: float16x8_t, v: float16x4_t) -> float16x8_t { +pub fn vmls_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - simd_mul( - a, - simd_shuffle!( - v, - v, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ), - ) - } + vmls_s16(a, b, vdup_lane_s16::(c)) } -#[doc = "Floating-point multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul, LANE = 0) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30525,21 +30117,21 @@ pub fn vmulq_lane_f16(a: float16x8_t, v: float16x4_t) -> float1 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_lane_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) } +pub fn vmls_lane_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(LANE, 2); + vmls_u16(a, b, vdup_lane_u16::(c)) } -#[doc = "Floating-point multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul, LANE = 0) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30548,21 +30140,21 @@ pub fn vmul_lane_f32(a: float32x2_t, b: float32x2_t) -> float32 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_laneq_f32(a: float32x2_t, b: float32x4_t) -> float32x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) } +pub fn vmls_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 3); + vmls_s16(a, b, vdup_laneq_s16::(c)) } -#[doc = "Floating-point multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul, LANE = 0) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30571,26 +30163,21 @@ pub fn vmul_laneq_f32(a: float32x2_t, b: float32x4_t) -> float3 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_lane_f32(a: float32x4_t, b: float32x2_t) -> float32x4_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { - simd_mul( - a, - simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } +pub fn vmls_laneq_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t { + static_assert_uimm_bits!(LANE, 3); + vmls_u16(a, b, vdup_laneq_u16::(c)) } -#[doc = "Floating-point multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul, LANE = 0) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30599,26 +30186,21 @@ pub fn vmulq_lane_f32(a: float32x4_t, b: float32x2_t) -> float3 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_laneq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { +pub fn vmlsq_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - simd_mul( - a, - simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } + vmlsq_s16(a, b, vdupq_lane_s16::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s16)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30627,26 +30209,21 @@ pub fn vmulq_laneq_f32(a: float32x4_t, b: float32x4_t) -> float target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_lane_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { +pub fn vmlsq_lane_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - simd_mul( - a, - simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } + vmlsq_u16(a, b, vdupq_lane_u16::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s16)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30655,39 +30232,21 @@ pub fn vmul_lane_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_lane_s16(a: int16x8_t, b: int16x4_t) -> int16x8_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { - simd_mul( - a, - simd_shuffle!( - b, - b, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ), - ) - } -} -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s32)"] +pub fn vmlsq_laneq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 3); + vmlsq_s16(a, b, vdupq_laneq_s16::(c)) +} +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30696,21 +30255,21 @@ pub fn vmulq_lane_s16(a: int16x8_t, b: int16x4_t) -> int16x8_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_lane_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) } +pub fn vmlsq_laneq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(LANE, 3); + vmlsq_u16(a, b, vdupq_laneq_u16::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30719,26 +30278,21 @@ pub fn vmul_lane_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_lane_s32(a: int32x4_t, b: int32x2_t) -> int32x4_t { +pub fn vmls_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - simd_mul( - a, - simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } + vmls_s32(a, b, vdup_lane_s32::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u16)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30747,26 +30301,21 @@ pub fn vmulq_lane_s32(a: int32x4_t, b: int32x2_t) -> int32x4_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_lane_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { - simd_mul( - a, - simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } +pub fn vmls_lane_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(LANE, 1); + vmls_u32(a, b, vdup_lane_u32::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u16)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30775,39 +30324,21 @@ pub fn vmul_lane_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_ target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_lane_u16(a: uint16x8_t, b: uint16x4_t) -> uint16x8_t { +pub fn vmls_laneq_s32(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - simd_mul( - a, - simd_shuffle!( - b, - b, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ), - ) - } + vmls_s32(a, b, vdup_laneq_s32::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30816,21 +30347,21 @@ pub fn vmulq_lane_u16(a: uint16x8_t, b: uint16x4_t) -> uint16x8 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_lane_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) } +pub fn vmls_laneq_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t { + static_assert_uimm_bits!(LANE, 2); + vmls_u32(a, b, vdup_laneq_u32::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30839,26 +30370,21 @@ pub fn vmul_lane_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_ target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_lane_u32(a: uint32x4_t, b: uint32x2_t) -> uint32x4_t { +pub fn vmlsq_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - simd_mul( - a, - simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } + vmlsq_s32(a, b, vdupq_lane_s32::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s16)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30867,26 +30393,21 @@ pub fn vmulq_lane_u32(a: uint32x4_t, b: uint32x2_t) -> uint32x4 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4_t { - static_assert_uimm_bits!(LANE, 3); - unsafe { - simd_mul( - a, - simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } +pub fn vmlsq_lane_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 1); + vmlsq_u32(a, b, vdupq_lane_u32::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s16)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30895,39 +30416,21 @@ pub fn vmul_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - static_assert_uimm_bits!(LANE, 3); - unsafe { - simd_mul( - a, - simd_shuffle!( - b, - b, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ), - ) - } +pub fn vmlsq_laneq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + vmlsq_s32(a, b, vdupq_laneq_s32::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(mls, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30936,21 +30439,20 @@ pub fn vmulq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2_t { +pub fn vmlsq_laneq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) } + vmlsq_u32(a, b, vdupq_laneq_u32::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(fmul) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30959,26 +30461,19 @@ pub fn vmul_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { - simd_mul( - a, - simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } +pub fn vmls_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { + vmls_f32(a, b, vdup_n_f32(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u16)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(fmul) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -30987,26 +30482,19 @@ pub fn vmulq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_laneq_u16(a: uint16x4_t, b: uint16x8_t) -> uint16x4_t { - static_assert_uimm_bits!(LANE, 3); - unsafe { - simd_mul( - a, - simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } +pub fn vmlsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { + vmlsq_f32(a, b, vdupq_n_f32(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u16)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(mls) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -31015,39 +30503,19 @@ pub fn vmul_laneq_u16(a: uint16x4_t, b: uint16x8_t) -> uint16x4 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_laneq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - static_assert_uimm_bits!(LANE, 3); - unsafe { - simd_mul( - a, - simd_shuffle!( - b, - b, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ), - ) - } +pub fn vmls_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t { + vmls_s16(a, b, vdup_n_s16(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(mls) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -31056,21 +30524,19 @@ pub fn vmulq_laneq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_laneq_u32(a: uint32x2_t, b: uint32x4_t) -> uint32x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) } +pub fn vmlsq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t { + vmlsq_s16(a, b, vdupq_n_s16(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul, LANE = 1) + assert_instr(mls) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -31079,54 +30545,18 @@ pub fn vmul_laneq_u32(a: uint32x2_t, b: uint32x4_t) -> uint32x2 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_laneq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { - simd_mul( - a, - simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } -} -#[doc = "Vector multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f16)"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul) -)] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vmul_n_f16(a: float16x4_t, b: f16) -> float16x4_t { - unsafe { simd_mul(a, vdup_n_f16(b)) } -} -#[doc = "Vector multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_f16)"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul) -)] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vmulq_n_f16(a: float16x8_t, b: f16) -> float16x8_t { - unsafe { simd_mul(a, vdupq_n_f16(b)) } +pub fn vmls_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t { + vmls_u16(a, b, vdup_n_u16(c)) } -#[doc = "Vector multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -31136,18 +30566,18 @@ pub fn vmulq_n_f16(a: float16x8_t, b: f16) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_n_f32(a: float32x2_t, b: f32) -> float32x2_t { - unsafe { simd_mul(a, vdup_n_f32(b)) } +pub fn vmlsq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t { + vmlsq_u16(a, b, vdupq_n_u16(c)) } -#[doc = "Vector multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_f32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -31157,18 +30587,18 @@ pub fn vmul_n_f32(a: float32x2_t, b: f32) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_n_f32(a: float32x4_t, b: f32) -> float32x4_t { - unsafe { simd_mul(a, vdupq_n_f32(b)) } +pub fn vmls_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t { + vmls_s32(a, b, vdup_n_s32(c)) } -#[doc = "Vector multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_s16)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -31178,18 +30608,18 @@ pub fn vmulq_n_f32(a: float32x4_t, b: f32) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_n_s16(a: int16x4_t, b: i16) -> int16x4_t { - unsafe { simd_mul(a, vdup_n_s16(b)) } +pub fn vmlsq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t { + vmlsq_s32(a, b, vdupq_n_s32(c)) } -#[doc = "Vector multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_s16)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -31199,18 +30629,18 @@ pub fn vmul_n_s16(a: int16x4_t, b: i16) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_n_s16(a: int16x8_t, b: i16) -> int16x8_t { - unsafe { simd_mul(a, vdupq_n_s16(b)) } +pub fn vmls_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t { + vmls_u32(a, b, vdup_n_u32(c)) } -#[doc = "Vector multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_s32)"] +#[doc = "Vector multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -31220,18 +30650,18 @@ pub fn vmulq_n_s16(a: int16x8_t, b: i16) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_n_s32(a: int32x2_t, b: i32) -> int32x2_t { - unsafe { simd_mul(a, vdup_n_s32(b)) } +pub fn vmlsq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t { + vmlsq_u32(a, b, vdupq_n_u32(c)) } -#[doc = "Vector multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_s32)"] +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -31241,18 +30671,18 @@ pub fn vmul_n_s32(a: int32x2_t, b: i32) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_n_s32(a: int32x4_t, b: i32) -> int32x4_t { - unsafe { simd_mul(a, vdupq_n_s32(b)) } +pub fn vmls_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { + unsafe { simd_sub(a, simd_mul(b, c)) } } -#[doc = "Vector multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_u16)"] +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -31262,18 +30692,18 @@ pub fn vmulq_n_s32(a: int32x4_t, b: i32) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_n_u16(a: uint16x4_t, b: u16) -> uint16x4_t { - unsafe { simd_mul(a, vdup_n_u16(b)) } +pub fn vmlsq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t { + unsafe { simd_sub(a, simd_mul(b, c)) } } -#[doc = "Vector multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_u16)"] +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -31283,18 +30713,18 @@ pub fn vmul_n_u16(a: uint16x4_t, b: u16) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_n_u16(a: uint16x8_t, b: u16) -> uint16x8_t { - unsafe { simd_mul(a, vdupq_n_u16(b)) } +pub fn vmls_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { + unsafe { simd_sub(a, simd_mul(b, c)) } } -#[doc = "Vector multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_u32)"] +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -31304,18 +30734,18 @@ pub fn vmulq_n_u16(a: uint16x8_t, b: u16) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_n_u32(a: uint32x2_t, b: u32) -> uint32x2_t { - unsafe { simd_mul(a, vdup_n_u32(b)) } +pub fn vmlsq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { + unsafe { simd_sub(a, simd_mul(b, c)) } } -#[doc = "Vector multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_u32)"] +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -31325,18 +30755,18 @@ pub fn vmul_n_u32(a: uint32x2_t, b: u32) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_n_u32(a: uint32x4_t, b: u32) -> uint32x4_t { - unsafe { simd_mul(a, vdupq_n_u32(b)) } +pub fn vmls_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { + unsafe { simd_sub(a, simd_mul(b, c)) } } -#[doc = "Polynomial multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_p8)"] +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(pmul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -31346,26 +30776,18 @@ pub fn vmulq_n_u32(a: uint32x4_t, b: u32) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v8i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.pmul.v8i8" - )] - fn _vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t; - } - unsafe { _vmul_p8(a, b) } +pub fn vmlsq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { + unsafe { simd_sub(a, simd_mul(b, c)) } } -#[doc = "Polynomial multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_p8)"] +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(pmul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -31375,26 +30797,18 @@ pub fn vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v16i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.pmul.v16i8" - )] - fn _vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t; - } - unsafe { _vmulq_p8(a, b) } +pub fn vmls_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { + unsafe { simd_sub(a, simd_mul(b, c)) } } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s16)"] +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -31404,18 +30818,18 @@ pub fn vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe { simd_mul(a, b) } +pub fn vmlsq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t { + unsafe { simd_sub(a, simd_mul(b, c)) } } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s16)"] +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -31425,18 +30839,18 @@ pub fn vmul_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe { simd_mul(a, b) } +pub fn vmls_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { + unsafe { simd_sub(a, simd_mul(b, c)) } } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u16)"] +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -31446,18 +30860,18 @@ pub fn vmulq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe { simd_mul(a, b) } +pub fn vmlsq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { + unsafe { simd_sub(a, simd_mul(b, c)) } } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u16)"] +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -31467,18 +30881,18 @@ pub fn vmul_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe { simd_mul(a, b) } +pub fn vmls_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { + unsafe { simd_sub(a, simd_mul(b, c)) } } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s32)"] +#[doc = "Multiply-subtract from accumulator"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(mls) )] #[cfg_attr( not(target_arch = "arm"), @@ -31488,19 +30902,20 @@ pub fn vmulq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe { simd_mul(a, b) } +pub fn vmlsq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + unsafe { simd_sub(a, simd_mul(b, c)) } } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s32)"] +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(smlsl, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -31509,19 +30924,21 @@ pub fn vmul_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe { simd_mul(a, b) } +pub fn vmlsl_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + vmlsl_s16(a, b, vdup_lane_s16::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u32)"] +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(smlsl, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -31530,19 +30947,21 @@ pub fn vmulq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe { simd_mul(a, b) } +pub fn vmlsl_laneq_s16(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 3); + vmlsl_s16(a, b, vdup_laneq_s16::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u32)"] +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(smlsl, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -31551,19 +30970,21 @@ pub fn vmul_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe { simd_mul(a, b) } +pub fn vmlsl_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + static_assert_uimm_bits!(LANE, 1); + vmlsl_s32(a, b, vdup_lane_s32::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s8)"] +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(smlsl, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -31572,19 +30993,21 @@ pub fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe { simd_mul(a, b) } +pub fn vmlsl_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t { + static_assert_uimm_bits!(LANE, 2); + vmlsl_s32(a, b, vdup_laneq_s32::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s8)"] +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(umlsl, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -31593,19 +31016,21 @@ pub fn vmul_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - unsafe { simd_mul(a, b) } +pub fn vmlsl_lane_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 2); + vmlsl_u16(a, b, vdup_lane_u16::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u8)"] +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(umlsl, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -31614,19 +31039,21 @@ pub fn vmulq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmul_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { simd_mul(a, b) } +pub fn vmlsl_laneq_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 3); + vmlsl_u16(a, b, vdup_laneq_u16::(c)) } -#[doc = "Multiply"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u8)"] +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mul) + assert_instr(umlsl, LANE = 1) )] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -31635,20 +31062,21 @@ pub fn vmul_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmulq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - unsafe { simd_mul(a, b) } +pub fn vmlsl_lane_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { + static_assert_uimm_bits!(LANE, 1); + vmlsl_u32(a, b, vdup_lane_u32::(c)) } -#[doc = "Vector long multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_s16)"] +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smull, LANE = 1) + assert_instr(umlsl, LANE = 1) )] -#[rustc_legacy_const_generics(2)] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -31657,26 +31085,20 @@ pub fn vmulq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_lane_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { +pub fn vmlsl_laneq_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - vmull_s16( - a, - simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } + vmlsl_u32(a, b, vdup_laneq_u32::(c)) } -#[doc = "Vector long multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_s16)"] +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smull, LANE = 1) + assert_instr(smlsl) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -31685,26 +31107,19 @@ pub fn vmull_lane_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_laneq_s16(a: int16x4_t, b: int16x8_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 3); - unsafe { - vmull_s16( - a, - simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } +pub fn vmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { + vmlsl_s16(a, b, vdup_n_s16(c)) } -#[doc = "Vector long multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_s32)"] +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smull, LANE = 1) + assert_instr(smlsl) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -31713,21 +31128,19 @@ pub fn vmull_laneq_s16(a: int16x4_t, b: int16x8_t) -> int32x4_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_lane_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { vmull_s32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) } +pub fn vmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { + vmlsl_s32(a, b, vdup_n_s32(c)) } -#[doc = "Vector long multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_s32)"] +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smull, LANE = 1) + assert_instr(umlsl) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -31736,21 +31149,19 @@ pub fn vmull_lane_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_laneq_s32(a: int32x2_t, b: int32x4_t) -> int64x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { vmull_s32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) } +pub fn vmlsl_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t { + vmlsl_u16(a, b, vdup_n_u16(c)) } -#[doc = "Vector long multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_u16)"] +#[doc = "Vector widening multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umull, LANE = 1) + assert_instr(umlsl) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -31759,26 +31170,19 @@ pub fn vmull_laneq_s32(a: int32x2_t, b: int32x4_t) -> int64x2_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_lane_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { - vmull_u16( - a, - simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } +pub fn vmlsl_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t { + vmlsl_u32(a, b, vdup_n_u32(c)) } -#[doc = "Vector long multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_u16)"] +#[doc = "Signed multiply-subtract long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umull, LANE = 1) + assert_instr(smlsl) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -31787,26 +31191,19 @@ pub fn vmull_lane_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_laneq_u16(a: uint16x4_t, b: uint16x8_t) -> uint32x4_t { - static_assert_uimm_bits!(LANE, 3); - unsafe { - vmull_u16( - a, - simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } +pub fn vmlsl_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t { + unsafe { simd_sub(a, vmull_s8(b, c)) } } -#[doc = "Vector long multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_u32)"] +#[doc = "Signed multiply-subtract long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umull, LANE = 1) + assert_instr(smlsl) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -31815,21 +31212,19 @@ pub fn vmull_laneq_u16(a: uint16x4_t, b: uint16x8_t) -> uint32x target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_lane_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { vmull_u32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) } +pub fn vmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + unsafe { simd_sub(a, vmull_s16(b, c)) } } -#[doc = "Vector long multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_u32)"] +#[doc = "Signed multiply-subtract long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umull, LANE = 1) + assert_instr(smlsl) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -31838,19 +31233,18 @@ pub fn vmull_lane_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_laneq_u32(a: uint32x2_t, b: uint32x4_t) -> uint64x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { vmull_u32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) } +pub fn vmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + unsafe { simd_sub(a, vmull_s32(b, c)) } } -#[doc = "Vector long multiply with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_s16)"] +#[doc = "Unsigned multiply-subtract long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smull) + assert_instr(umlsl) )] #[cfg_attr( not(target_arch = "arm"), @@ -31860,18 +31254,18 @@ pub fn vmull_laneq_u32(a: uint32x2_t, b: uint32x4_t) -> uint64x target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t { - vmull_s16(a, vdup_n_s16(b)) +pub fn vmlsl_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t { + unsafe { simd_sub(a, vmull_u8(b, c)) } } -#[doc = "Vector long multiply with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_s32)"] +#[doc = "Unsigned multiply-subtract long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smull) + assert_instr(umlsl) )] #[cfg_attr( not(target_arch = "arm"), @@ -31881,18 +31275,18 @@ pub fn vmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t { - vmull_s32(a, vdup_n_s32(b)) +pub fn vmlsl_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { + unsafe { simd_sub(a, vmull_u16(b, c)) } } -#[doc = "Vector long multiply with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_u16)"] +#[doc = "Unsigned multiply-subtract long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umull) + assert_instr(umlsl) )] #[cfg_attr( not(target_arch = "arm"), @@ -31902,89 +31296,108 @@ pub fn vmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_n_u16(a: uint16x4_t, b: u16) -> uint32x4_t { - vmull_u16(a, vdup_n_u16(b)) +pub fn vmlsl_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { + unsafe { simd_sub(a, vmull_u32(b, c)) } } -#[doc = "Vector long multiply with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_u32)"] +#[doc = "8-bit integer matrix multiply-accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmmlaq_s32)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umull) + assert_instr(smmla) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + unstable(feature = "stdarch_neon_i8mm", issue = "117223") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_n_u32(a: uint32x2_t, b: u32) -> uint64x2_t { - vmull_u32(a, vdup_n_u32(b)) +pub fn vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.smmla.v4i32.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.smmla.v4i32.v16i8")] + fn _vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t; + } + unsafe { _vmmlaq_s32(a, b, c) } } -#[doc = "Polynomial multiply long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_p8)"] +#[doc = "8-bit integer matrix multiply-accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmmlaq_u32)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.p8"))] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(pmull) + assert_instr(ummla) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + unstable(feature = "stdarch_neon_i8mm", issue = "117223") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t { +pub fn vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.pmull.v8i16" + link_name = "llvm.aarch64.neon.ummla.v4i32.v16i8" )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullp.v8i16")] - fn _vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ummla.v4i32.v16i8")] + fn _vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t; } - unsafe { _vmull_p8(a, b) } + unsafe { _vmmlaq_u32(a, b, c) } } -#[doc = "Signed multiply long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s16)"] +#[doc = "Duplicate element to vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_f16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smull) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + assert_instr(dup) )] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vmov_n_f16(a: f16) -> float16x4_t { + vdup_n_f16(a) +} +#[doc = "Duplicate element to vector"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_f16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(dup) )] -pub fn vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { - unsafe { simd_mul(simd_cast(a), simd_cast(b)) } +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vmovq_n_f16(a: f16) -> float16x8_t { + vdupq_n_f16(a) } -#[doc = "Signed multiply long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smull) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -31994,18 +31407,18 @@ pub fn vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { - unsafe { simd_mul(simd_cast(a), simd_cast(b)) } +pub fn vmov_n_f32(value: f32) -> float32x2_t { + vdup_n_f32(value) } -#[doc = "Signed multiply long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s8)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_p16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smull) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -32015,18 +31428,18 @@ pub fn vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t { - unsafe { simd_mul(simd_cast(a), simd_cast(b)) } +pub fn vmov_n_p16(value: p16) -> poly16x4_t { + vdup_n_p16(value) } -#[doc = "Unsigned multiply long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u8)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_p8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umull) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -32036,18 +31449,18 @@ pub fn vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t { - unsafe { simd_mul(simd_cast(a), simd_cast(b)) } +pub fn vmov_n_p8(value: p8) -> poly8x8_t { + vdup_n_p8(value) } -#[doc = "Unsigned multiply long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umull) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -32057,18 +31470,18 @@ pub fn vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { - unsafe { simd_mul(simd_cast(a), simd_cast(b)) } +pub fn vmov_n_s16(value: i16) -> int16x4_t { + vdup_n_s16(value) } -#[doc = "Unsigned multiply long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umull) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -32078,18 +31491,18 @@ pub fn vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { - unsafe { simd_mul(simd_cast(a), simd_cast(b)) } +pub fn vmov_n_s32(value: i32) -> int32x2_t { + vdup_n_s32(value) } -#[doc = "Vector bitwise not."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_p8)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mvn) + assert_instr(fmov) )] #[cfg_attr( not(target_arch = "arm"), @@ -32099,19 +31512,18 @@ pub fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmvn_p8(a: poly8x8_t) -> poly8x8_t { - let b = poly8x8_t::splat(255); - unsafe { simd_xor(a, b) } +pub fn vmov_n_s64(value: i64) -> int64x1_t { + vdup_n_s64(value) } -#[doc = "Vector bitwise not."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_s16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mvn) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -32121,19 +31533,18 @@ pub fn vmvn_p8(a: poly8x8_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmvn_s16(a: int16x4_t) -> int16x4_t { - let b = int16x4_t::splat(-1); - unsafe { simd_xor(a, b) } +pub fn vmov_n_s8(value: i8) -> int8x8_t { + vdup_n_s8(value) } -#[doc = "Vector bitwise not."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_s32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mvn) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -32143,19 +31554,18 @@ pub fn vmvn_s16(a: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmvn_s32(a: int32x2_t) -> int32x2_t { - let b = int32x2_t::splat(-1); - unsafe { simd_xor(a, b) } +pub fn vmov_n_u16(value: u16) -> uint16x4_t { + vdup_n_u16(value) } -#[doc = "Vector bitwise not."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_s8)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mvn) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -32165,19 +31575,18 @@ pub fn vmvn_s32(a: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmvn_s8(a: int8x8_t) -> int8x8_t { - let b = int8x8_t::splat(-1); - unsafe { simd_xor(a, b) } +pub fn vmov_n_u32(value: u32) -> uint32x2_t { + vdup_n_u32(value) } -#[doc = "Vector bitwise not."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_u16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mvn) + assert_instr(fmov) )] #[cfg_attr( not(target_arch = "arm"), @@ -32187,19 +31596,18 @@ pub fn vmvn_s8(a: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmvn_u16(a: uint16x4_t) -> uint16x4_t { - let b = uint16x4_t::splat(65_535); - unsafe { simd_xor(a, b) } +pub fn vmov_n_u64(value: u64) -> uint64x1_t { + vdup_n_u64(value) } -#[doc = "Vector bitwise not."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_u32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mvn) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -32209,19 +31617,18 @@ pub fn vmvn_u16(a: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmvn_u32(a: uint32x2_t) -> uint32x2_t { - let b = uint32x2_t::splat(4_294_967_295); - unsafe { simd_xor(a, b) } +pub fn vmov_n_u8(value: u8) -> uint8x8_t { + vdup_n_u8(value) } -#[doc = "Vector bitwise not."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_u8)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mvn) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -32231,19 +31638,18 @@ pub fn vmvn_u32(a: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmvn_u8(a: uint8x8_t) -> uint8x8_t { - let b = uint8x8_t::splat(255); - unsafe { simd_xor(a, b) } +pub fn vmovq_n_f32(value: f32) -> float32x4_t { + vdupq_n_f32(value) } -#[doc = "Vector bitwise not."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_p8)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_p16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mvn) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -32253,19 +31659,18 @@ pub fn vmvn_u8(a: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmvnq_p8(a: poly8x16_t) -> poly8x16_t { - let b = poly8x16_t::splat(255); - unsafe { simd_xor(a, b) } +pub fn vmovq_n_p16(value: p16) -> poly16x8_t { + vdupq_n_p16(value) } -#[doc = "Vector bitwise not."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_s16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_p8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mvn) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -32275,19 +31680,18 @@ pub fn vmvnq_p8(a: poly8x16_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmvnq_s16(a: int16x8_t) -> int16x8_t { - let b = int16x8_t::splat(-1); - unsafe { simd_xor(a, b) } +pub fn vmovq_n_p8(value: p8) -> poly8x16_t { + vdupq_n_p8(value) } -#[doc = "Vector bitwise not."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_s32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mvn) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -32297,19 +31701,18 @@ pub fn vmvnq_s16(a: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmvnq_s32(a: int32x4_t) -> int32x4_t { - let b = int32x4_t::splat(-1); - unsafe { simd_xor(a, b) } +pub fn vmovq_n_s16(value: i16) -> int16x8_t { + vdupq_n_s16(value) } -#[doc = "Vector bitwise not."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_s8)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mvn) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -32319,19 +31722,18 @@ pub fn vmvnq_s32(a: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmvnq_s8(a: int8x16_t) -> int8x16_t { - let b = int8x16_t::splat(-1); - unsafe { simd_xor(a, b) } +pub fn vmovq_n_s32(value: i32) -> int32x4_t { + vdupq_n_s32(value) } -#[doc = "Vector bitwise not."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_u16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mvn) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -32341,19 +31743,18 @@ pub fn vmvnq_s8(a: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmvnq_u16(a: uint16x8_t) -> uint16x8_t { - let b = uint16x8_t::splat(65_535); - unsafe { simd_xor(a, b) } +pub fn vmovq_n_s64(value: i64) -> int64x2_t { + vdupq_n_s64(value) } -#[doc = "Vector bitwise not."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_u32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mvn) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -32363,19 +31764,18 @@ pub fn vmvnq_u16(a: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmvnq_u32(a: uint32x4_t) -> uint32x4_t { - let b = uint32x4_t::splat(4_294_967_295); - unsafe { simd_xor(a, b) } +pub fn vmovq_n_s8(value: i8) -> int8x16_t { + vdupq_n_s8(value) } -#[doc = "Vector bitwise not."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_u8)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(mvn) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -32385,63 +31785,60 @@ pub fn vmvnq_u32(a: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vmvnq_u8(a: uint8x16_t) -> uint8x16_t { - let b = uint8x16_t::splat(255); - unsafe { simd_xor(a, b) } +pub fn vmovq_n_u16(value: u16) -> uint16x8_t { + vdupq_n_u16(value) } -#[doc = "Negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u32)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f16"))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fneg) + assert_instr(dup) )] -#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vneg_f16(a: float16x4_t) -> float16x4_t { - unsafe { simd_neg(a) } +pub fn vmovq_n_u32(value: u32) -> uint32x4_t { + vdupq_n_u32(value) } -#[doc = "Negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_f16)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u64)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f16"))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fneg) + assert_instr(dup) )] -#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vnegq_f16(a: float16x8_t) -> float16x8_t { - unsafe { simd_neg(a) } +pub fn vmovq_n_u64(value: u64) -> uint64x2_t { + vdupq_n_u64(value) } -#[doc = "Negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f32)"] +#[doc = "Duplicate vector element to vector or scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fneg) + assert_instr(dup) )] #[cfg_attr( not(target_arch = "arm"), @@ -32451,18 +31848,18 @@ pub fn vnegq_f16(a: float16x8_t) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vneg_f32(a: float32x2_t) -> float32x2_t { - unsafe { simd_neg(a) } +pub fn vmovq_n_u8(value: u8) -> uint8x16_t { + vdupq_n_u8(value) } -#[doc = "Negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_f32)"] +#[doc = "Vector long move."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fneg) + assert_instr(sxtl) )] #[cfg_attr( not(target_arch = "arm"), @@ -32472,18 +31869,18 @@ pub fn vneg_f32(a: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vnegq_f32(a: float32x4_t) -> float32x4_t { - unsafe { simd_neg(a) } +pub fn vmovl_s16(a: int16x4_t) -> int32x4_t { + unsafe { simd_cast(a) } } -#[doc = "Negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s8)"] +#[doc = "Vector long move."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(neg) + assert_instr(sxtl) )] #[cfg_attr( not(target_arch = "arm"), @@ -32493,18 +31890,18 @@ pub fn vnegq_f32(a: float32x4_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vneg_s8(a: int8x8_t) -> int8x8_t { - unsafe { simd_neg(a) } +pub fn vmovl_s32(a: int32x2_t) -> int64x2_t { + unsafe { simd_cast(a) } } -#[doc = "Negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s8)"] +#[doc = "Vector long move."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(neg) + assert_instr(sxtl) )] #[cfg_attr( not(target_arch = "arm"), @@ -32514,18 +31911,18 @@ pub fn vneg_s8(a: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vnegq_s8(a: int8x16_t) -> int8x16_t { - unsafe { simd_neg(a) } +pub fn vmovl_s8(a: int8x8_t) -> int16x8_t { + unsafe { simd_cast(a) } } -#[doc = "Negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s16)"] +#[doc = "Vector long move."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(neg) + assert_instr(uxtl) )] #[cfg_attr( not(target_arch = "arm"), @@ -32535,18 +31932,18 @@ pub fn vnegq_s8(a: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vneg_s16(a: int16x4_t) -> int16x4_t { - unsafe { simd_neg(a) } +pub fn vmovl_u16(a: uint16x4_t) -> uint32x4_t { + unsafe { simd_cast(a) } } -#[doc = "Negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s16)"] +#[doc = "Vector long move."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(neg) + assert_instr(uxtl) )] #[cfg_attr( not(target_arch = "arm"), @@ -32556,18 +31953,18 @@ pub fn vneg_s16(a: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vnegq_s16(a: int16x8_t) -> int16x8_t { - unsafe { simd_neg(a) } +pub fn vmovl_u32(a: uint32x2_t) -> uint64x2_t { + unsafe { simd_cast(a) } } -#[doc = "Negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s32)"] +#[doc = "Vector long move."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(neg) + assert_instr(uxtl) )] #[cfg_attr( not(target_arch = "arm"), @@ -32577,18 +31974,18 @@ pub fn vnegq_s16(a: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vneg_s32(a: int32x2_t) -> int32x2_t { - unsafe { simd_neg(a) } +pub fn vmovl_u8(a: uint8x8_t) -> uint16x8_t { + unsafe { simd_cast(a) } } -#[doc = "Negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s32)"] +#[doc = "Vector narrow integer."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(neg) + assert_instr(xtn) )] #[cfg_attr( not(target_arch = "arm"), @@ -32598,18 +31995,18 @@ pub fn vneg_s32(a: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vnegq_s32(a: int32x4_t) -> int32x4_t { - unsafe { simd_neg(a) } +pub fn vmovn_s16(a: int16x8_t) -> int8x8_t { + unsafe { simd_cast(a) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s16)"] +#[doc = "Vector narrow integer."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(xtn) )] #[cfg_attr( not(target_arch = "arm"), @@ -32619,19 +32016,18 @@ pub fn vnegq_s32(a: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorn_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - let c = int16x4_t::splat(-1); - unsafe { simd_or(simd_xor(b, c), a) } +pub fn vmovn_s32(a: int32x4_t) -> int16x4_t { + unsafe { simd_cast(a) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s32)"] +#[doc = "Vector narrow integer."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(xtn) )] #[cfg_attr( not(target_arch = "arm"), @@ -32641,19 +32037,18 @@ pub fn vorn_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorn_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - let c = int32x2_t::splat(-1); - unsafe { simd_or(simd_xor(b, c), a) } +pub fn vmovn_s64(a: int64x2_t) -> int32x2_t { + unsafe { simd_cast(a) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s64)"] +#[doc = "Vector narrow integer."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(xtn) )] #[cfg_attr( not(target_arch = "arm"), @@ -32663,19 +32058,18 @@ pub fn vorn_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorn_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - let c = int64x1_t::splat(-1); - unsafe { simd_or(simd_xor(b, c), a) } +pub fn vmovn_u16(a: uint16x8_t) -> uint8x8_t { + unsafe { simd_cast(a) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s8)"] +#[doc = "Vector narrow integer."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(xtn) )] #[cfg_attr( not(target_arch = "arm"), @@ -32685,19 +32079,18 @@ pub fn vorn_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorn_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - let c = int8x8_t::splat(-1); - unsafe { simd_or(simd_xor(b, c), a) } +pub fn vmovn_u32(a: uint32x4_t) -> uint16x4_t { + unsafe { simd_cast(a) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s16)"] +#[doc = "Vector narrow integer."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(xtn) )] #[cfg_attr( not(target_arch = "arm"), @@ -32707,63 +32100,62 @@ pub fn vorn_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vornq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - let c = int16x8_t::splat(-1); - unsafe { simd_or(simd_xor(b, c), a) } +pub fn vmovn_u64(a: uint64x2_t) -> uint32x2_t { + unsafe { simd_cast(a) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s32)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(fmul) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vornq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - let c = int32x4_t::splat(-1); - unsafe { simd_or(simd_xor(b, c), a) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vmul_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe { simd_mul(a, b) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s64)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(fmul) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vornq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - let c = int64x2_t::splat(-1); - unsafe { simd_or(simd_xor(b, c), a) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vmulq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { simd_mul(a, b) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s8)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(fmul) )] #[cfg_attr( not(target_arch = "arm"), @@ -32773,19 +32165,18 @@ pub fn vornq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vornq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - let c = int8x16_t::splat(-1); - unsafe { simd_or(simd_xor(b, c), a) } +pub fn vmul_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe { simd_mul(a, b) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u16)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(fmul) )] #[cfg_attr( not(target_arch = "arm"), @@ -32795,64 +32186,68 @@ pub fn vornq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - let c = int16x4_t::splat(-1); - unsafe { simd_or(simd_xor(b, transmute(c)), a) } +pub fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe { simd_mul(a, b) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u32)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(fmul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - let c = int32x2_t::splat(-1); - unsafe { simd_or(simd_xor(b, transmute(c)), a) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vmul_lane_f16(a: float16x4_t, v: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_mul(a, vdup_lane_f16::(v)) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u64)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(fmul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorn_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - let c = int64x1_t::splat(-1); - unsafe { simd_or(simd_xor(b, transmute(c)), a) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vmulq_lane_f16(a: float16x8_t, v: float16x4_t) -> float16x8_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_mul(a, vdupq_lane_f16::(v)) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u8)"] +#[doc = "Floating-point multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(fmul, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -32861,20 +32256,21 @@ pub fn vorn_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - let c = int8x8_t::splat(-1); - unsafe { simd_or(simd_xor(b, transmute(c)), a) } +pub fn vmul_lane_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_mul(a, vdup_lane_f32::(b)) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u16)"] +#[doc = "Floating-point multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(fmul, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -32883,20 +32279,21 @@ pub fn vorn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vornq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - let c = int16x8_t::splat(-1); - unsafe { simd_or(simd_xor(b, transmute(c)), a) } +pub fn vmul_laneq_f32(a: float32x2_t, b: float32x4_t) -> float32x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_mul(a, vdup_laneq_f32::(b)) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u32)"] +#[doc = "Floating-point multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(fmul, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -32905,20 +32302,21 @@ pub fn vornq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vornq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - let c = int32x4_t::splat(-1); - unsafe { simd_or(simd_xor(b, transmute(c)), a) } +pub fn vmulq_lane_f32(a: float32x4_t, b: float32x2_t) -> float32x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_mul(a, vdupq_lane_f32::(b)) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u64)"] +#[doc = "Floating-point multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(fmul, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -32927,20 +32325,21 @@ pub fn vornq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vornq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - let c = int64x2_t::splat(-1); - unsafe { simd_or(simd_xor(b, transmute(c)), a) } +pub fn vmulq_laneq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_mul(a, vdupq_laneq_f32::(b)) } } -#[doc = "Vector bitwise inclusive OR NOT"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u8)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orn) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -32949,20 +32348,21 @@ pub fn vornq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vornq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - let c = int8x16_t::splat(-1); - unsafe { simd_or(simd_xor(b, transmute(c)), a) } +pub fn vmul_lane_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_mul(a, vdup_lane_s16::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s8)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -32971,19 +32371,21 @@ pub fn vornq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorr_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe { simd_or(a, b) } +pub fn vmulq_lane_s16(a: int16x8_t, b: int16x4_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_mul(a, vdupq_lane_s16::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s8)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -32992,19 +32394,21 @@ pub fn vorr_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorrq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - unsafe { simd_or(a, b) } +pub fn vmul_lane_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_mul(a, vdup_lane_s32::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s16)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -33013,19 +32417,21 @@ pub fn vorrq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorr_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe { simd_or(a, b) } +pub fn vmulq_lane_s32(a: int32x4_t, b: int32x2_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_mul(a, vdupq_lane_s32::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s16)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -33034,19 +32440,21 @@ pub fn vorr_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorrq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe { simd_or(a, b) } +pub fn vmul_lane_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_mul(a, vdup_lane_u16::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s32)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -33055,19 +32463,21 @@ pub fn vorrq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorr_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe { simd_or(a, b) } +pub fn vmulq_lane_u16(a: uint16x8_t, b: uint16x4_t) -> uint16x8_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_mul(a, vdupq_lane_u16::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s32)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -33076,19 +32486,21 @@ pub fn vorr_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorrq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe { simd_or(a, b) } +pub fn vmul_lane_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_mul(a, vdup_lane_u32::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s64)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -33097,19 +32509,21 @@ pub fn vorrq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorr_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - unsafe { simd_or(a, b) } +pub fn vmulq_lane_u32(a: uint32x4_t, b: uint32x2_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_mul(a, vdupq_lane_u32::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s64)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -33118,19 +32532,21 @@ pub fn vorr_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorrq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - unsafe { simd_or(a, b) } +pub fn vmul_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_mul(a, vdup_laneq_s16::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u8)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -33139,19 +32555,21 @@ pub fn vorrq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorr_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { simd_or(a, b) } +pub fn vmulq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_mul(a, vdupq_laneq_s16::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u8)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -33160,19 +32578,21 @@ pub fn vorr_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorrq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - unsafe { simd_or(a, b) } +pub fn vmul_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_mul(a, vdup_laneq_s32::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u16)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -33181,19 +32601,21 @@ pub fn vorrq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorr_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe { simd_or(a, b) } +pub fn vmulq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_mul(a, vdupq_laneq_s32::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u16)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -33202,19 +32624,21 @@ pub fn vorr_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorrq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe { simd_or(a, b) } +pub fn vmul_laneq_u16(a: uint16x4_t, b: uint16x8_t) -> uint16x4_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_mul(a, vdup_laneq_u16::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u32)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -33223,19 +32647,21 @@ pub fn vorrq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorr_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe { simd_or(a, b) } +pub fn vmulq_laneq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_mul(a, vdupq_laneq_u16::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u32)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -33244,19 +32670,21 @@ pub fn vorr_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorrq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe { simd_or(a, b) } +pub fn vmul_laneq_u32(a: uint32x2_t, b: uint32x4_t) -> uint32x2_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_mul(a, vdup_laneq_u32::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u64)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(mul, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -33265,18 +32693,49 @@ pub fn vorrq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorr_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - unsafe { simd_or(a, b) } +pub fn vmulq_laneq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_mul(a, vdupq_laneq_u32::(b)) } } -#[doc = "Vector bitwise or (immediate, inclusive)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u64)"] +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vmul_n_f16(a: float16x4_t, b: f16) -> float16x4_t { + unsafe { simd_mul(a, vdup_n_f16(b)) } +} +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fmul) +)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vmulq_n_f16(a: float16x8_t, b: f16) -> float16x8_t { + unsafe { simd_mul(a, vdupq_n_f16(b)) } +} +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(orr) + assert_instr(fmul) )] #[cfg_attr( not(target_arch = "arm"), @@ -33286,18 +32745,18 @@ pub fn vorr_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vorrq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - unsafe { simd_or(a, b) } +pub fn vmul_n_f32(a: float32x2_t, b: f32) -> float32x2_t { + unsafe { simd_mul(a, vdup_n_f32(b)) } } -#[doc = "Signed Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s8)"] +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sadalp) + assert_instr(fmul) )] #[cfg_attr( not(target_arch = "arm"), @@ -33307,27 +32766,18 @@ pub fn vorrq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t { - let x: int16x4_t; - #[cfg(target_arch = "arm")] - { - x = priv_vpadal_s8(a, b); - } - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - unsafe { - x = simd_add(vpaddl_s8(b), a); - }; - x +pub fn vmulq_n_f32(a: float32x4_t, b: f32) -> float32x4_t { + unsafe { simd_mul(a, vdupq_n_f32(b)) } } -#[doc = "Signed Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s8)"] +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sadalp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -33337,27 +32787,18 @@ pub fn vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { - let x: int16x8_t; - #[cfg(target_arch = "arm")] - { - x = priv_vpadalq_s8(a, b); - } - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - unsafe { - x = simd_add(vpaddlq_s8(b), a); - }; - x +pub fn vmul_n_s16(a: int16x4_t, b: i16) -> int16x4_t { + unsafe { simd_mul(a, vdup_n_s16(b)) } } -#[doc = "Signed Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s16)"] +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sadalp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -33367,27 +32808,18 @@ pub fn vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t { - let x: int32x2_t; - #[cfg(target_arch = "arm")] - { - x = priv_vpadal_s16(a, b); - } - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - unsafe { - x = simd_add(vpaddl_s16(b), a); - }; - x +pub fn vmulq_n_s16(a: int16x8_t, b: i16) -> int16x8_t { + unsafe { simd_mul(a, vdupq_n_s16(b)) } } -#[doc = "Signed Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s16)"] +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sadalp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -33397,27 +32829,18 @@ pub fn vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { - let x: int32x4_t; - #[cfg(target_arch = "arm")] - { - x = priv_vpadalq_s16(a, b); - } - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - unsafe { - x = simd_add(vpaddlq_s16(b), a); - }; - x +pub fn vmul_n_s32(a: int32x2_t, b: i32) -> int32x2_t { + unsafe { simd_mul(a, vdup_n_s32(b)) } } -#[doc = "Signed Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s32)"] +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sadalp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -33427,27 +32850,18 @@ pub fn vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t { - let x: int64x1_t; - #[cfg(target_arch = "arm")] - { - x = priv_vpadal_s32(a, b); - } - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - unsafe { - x = simd_add(vpaddl_s32(b), a); - }; - x +pub fn vmulq_n_s32(a: int32x4_t, b: i32) -> int32x4_t { + unsafe { simd_mul(a, vdupq_n_s32(b)) } } -#[doc = "Signed Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s32)"] +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sadalp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -33457,27 +32871,18 @@ pub fn vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { - let x: int64x2_t; - #[cfg(target_arch = "arm")] - { - x = priv_vpadalq_s32(a, b); - } - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - unsafe { - x = simd_add(vpaddlq_s32(b), a); - }; - x +pub fn vmul_n_u16(a: uint16x4_t, b: u16) -> uint16x4_t { + unsafe { simd_mul(a, vdup_n_u16(b)) } } -#[doc = "Unsigned Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u8)"] +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uadalp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -33487,27 +32892,18 @@ pub fn vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t { - let x: uint16x4_t; - #[cfg(target_arch = "arm")] - { - x = priv_vpadal_u8(a, b); - } - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - unsafe { - x = simd_add(vpaddl_u8(b), a); - }; - x +pub fn vmulq_n_u16(a: uint16x8_t, b: u16) -> uint16x8_t { + unsafe { simd_mul(a, vdupq_n_u16(b)) } } -#[doc = "Unsigned Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u8)"] +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uadalp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -33517,27 +32913,18 @@ pub fn vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { - let x: uint16x8_t; - #[cfg(target_arch = "arm")] - { - x = priv_vpadalq_u8(a, b); - } - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - unsafe { - x = simd_add(vpaddlq_u8(b), a); - }; - x +pub fn vmul_n_u32(a: uint32x2_t, b: u32) -> uint32x2_t { + unsafe { simd_mul(a, vdup_n_u32(b)) } } -#[doc = "Unsigned Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u16)"] +#[doc = "Vector multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uadalp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -33547,27 +32934,18 @@ pub fn vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t { - let x: uint32x2_t; - #[cfg(target_arch = "arm")] - { - x = priv_vpadal_u16(a, b); - } - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - unsafe { - x = simd_add(vpaddl_u16(b), a); - }; - x +pub fn vmulq_n_u32(a: uint32x4_t, b: u32) -> uint32x4_t { + unsafe { simd_mul(a, vdupq_n_u32(b)) } } -#[doc = "Unsigned Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u16)"] +#[doc = "Polynomial multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_p8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uadalp) + assert_instr(pmul) )] #[cfg_attr( not(target_arch = "arm"), @@ -33577,27 +32955,26 @@ pub fn vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { - let x: uint32x4_t; - #[cfg(target_arch = "arm")] - { - x = priv_vpadalq_u16(a, b); +pub fn vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.pmul.v8i8" + )] + fn _vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t; } - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - unsafe { - x = simd_add(vpaddlq_u16(b), a); - }; - x + unsafe { _vmul_p8(a, b) } } -#[doc = "Unsigned Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u32)"] +#[doc = "Polynomial multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_p8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uadalp) + assert_instr(pmul) )] #[cfg_attr( not(target_arch = "arm"), @@ -33607,27 +32984,26 @@ pub fn vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t { - let x: uint64x1_t; - #[cfg(target_arch = "arm")] - { - x = priv_vpadal_u32(a, b); +pub fn vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.pmul.v16i8" + )] + fn _vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t; } - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - unsafe { - x = simd_add(vpaddl_u32(b), a); - }; - x + unsafe { _vmulq_p8(a, b) } } -#[doc = "Unsigned Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u32)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uadalp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -33637,57 +33013,39 @@ pub fn vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { - let x: uint64x2_t; - #[cfg(target_arch = "arm")] - { - x = priv_vpadalq_u32(a, b); - } - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - unsafe { - x = simd_add(vpaddlq_u32(b), a); - }; - x +pub fn vmul_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { simd_mul(a, b) } } -#[doc = "Floating-point add pairwise"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f16)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s16)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(faddp) + assert_instr(mul) )] -#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4f16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.faddp.v4f16" - )] - fn _vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; - } - unsafe { _vpadd_f16(a, b) } +pub fn vmulq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { simd_mul(a, b) } } -#[doc = "Floating-point add pairwise"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f32)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(faddp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -33697,26 +33055,18 @@ pub fn vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2f32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.faddp.v2f32" - )] - fn _vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - } - unsafe { _vpadd_f32(a, b) } +pub fn vmul_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_mul(a, b) } } -#[doc = "Add pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s8)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(addp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -33726,26 +33076,18 @@ pub fn vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.addp.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v8i8")] - fn _vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - } - unsafe { _vpadd_s8(a, b) } +pub fn vmulq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_mul(a, b) } } -#[doc = "Add pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s16)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(addp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -33755,26 +33097,18 @@ pub fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.addp.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4i16")] - fn _vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - } - unsafe { _vpadd_s16(a, b) } +pub fn vmul_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { simd_mul(a, b) } } -#[doc = "Add pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s32)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(addp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -33784,26 +33118,18 @@ pub fn vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.addp.v2i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2i32")] - fn _vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - } - unsafe { _vpadd_s32(a, b) } +pub fn vmulq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { simd_mul(a, b) } } -#[doc = "Add pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u8)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(addp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -33813,18 +33139,18 @@ pub fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { transmute(vpadd_s8(transmute(a), transmute(b))) } +pub fn vmul_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_mul(a, b) } } -#[doc = "Add pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u16)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(addp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -33834,18 +33160,18 @@ pub fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe { transmute(vpadd_s16(transmute(a), transmute(b))) } +pub fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_mul(a, b) } } -#[doc = "Add pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u32)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(addp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -33855,18 +33181,18 @@ pub fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe { transmute(vpadd_s32(transmute(a), transmute(b))) } +pub fn vmul_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_mul(a, b) } } -#[doc = "Signed Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s8)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(saddlp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -33876,26 +33202,18 @@ pub fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpaddl_s8(a: int8x8_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.saddlp.v4i16.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v4i16.v8i8")] - fn _vpaddl_s8(a: int8x8_t) -> int16x4_t; - } - unsafe { _vpaddl_s8(a) } +pub fn vmulq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { simd_mul(a, b) } } -#[doc = "Signed Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s8)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(saddlp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -33905,26 +33223,18 @@ pub fn vpaddl_s8(a: int8x8_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpaddlq_s8(a: int8x16_t) -> int16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.saddlp.v8i16.v16i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v8i16.v16i8")] - fn _vpaddlq_s8(a: int8x16_t) -> int16x8_t; - } - unsafe { _vpaddlq_s8(a) } +pub fn vmul_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_mul(a, b) } } -#[doc = "Signed Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s16)"] +#[doc = "Multiply"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(saddlp) + assert_instr(mul) )] #[cfg_attr( not(target_arch = "arm"), @@ -33934,27 +33244,20 @@ pub fn vpaddlq_s8(a: int8x16_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpaddl_s16(a: int16x4_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.saddlp.v2i32.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i32.v4i16")] - fn _vpaddl_s16(a: int16x4_t) -> int32x2_t; - } - unsafe { _vpaddl_s16(a) } +pub fn vmulq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { simd_mul(a, b) } } -#[doc = "Signed Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s16)"] +#[doc = "Vector long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(saddlp) + assert_instr(smull, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -33963,27 +33266,21 @@ pub fn vpaddl_s16(a: int16x4_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpaddlq_s16(a: int16x8_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.saddlp.v4i32.v8i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v4i32.v8i16")] - fn _vpaddlq_s16(a: int16x8_t) -> int32x4_t; - } - unsafe { _vpaddlq_s16(a) } +pub fn vmull_lane_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + vmull_s16(a, vdup_lane_s16::(b)) } -#[doc = "Signed Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s32)"] +#[doc = "Vector long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(saddlp) + assert_instr(smull, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -33992,27 +33289,21 @@ pub fn vpaddlq_s16(a: int16x8_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpaddl_s32(a: int32x2_t) -> int64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.saddlp.v1i64.v2i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v1i64.v2i32")] - fn _vpaddl_s32(a: int32x2_t) -> int64x1_t; - } - unsafe { _vpaddl_s32(a) } +pub fn vmull_laneq_s16(a: int16x4_t, b: int16x8_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 3); + vmull_s16(a, vdup_laneq_s16::(b)) } -#[doc = "Signed Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s32)"] +#[doc = "Vector long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(saddlp) + assert_instr(smull, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -34021,27 +33312,21 @@ pub fn vpaddl_s32(a: int32x2_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpaddlq_s32(a: int32x4_t) -> int64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.saddlp.v2i64.v4i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i64.v4i32")] - fn _vpaddlq_s32(a: int32x4_t) -> int64x2_t; - } - unsafe { _vpaddlq_s32(a) } +pub fn vmull_lane_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { + static_assert_uimm_bits!(LANE, 1); + vmull_s32(a, vdup_lane_s32::(b)) } -#[doc = "Unsigned Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u8)"] +#[doc = "Vector long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uaddlp) + assert_instr(smull, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -34050,27 +33335,21 @@ pub fn vpaddlq_s32(a: int32x4_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpaddl_u8(a: uint8x8_t) -> uint16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uaddlp.v4i16.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i16.v8i8")] - fn _vpaddl_u8(a: uint8x8_t) -> uint16x4_t; - } - unsafe { _vpaddl_u8(a) } +pub fn vmull_laneq_s32(a: int32x2_t, b: int32x4_t) -> int64x2_t { + static_assert_uimm_bits!(LANE, 2); + vmull_s32(a, vdup_laneq_s32::(b)) } -#[doc = "Unsigned Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u8)"] +#[doc = "Vector long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uaddlp) + assert_instr(umull, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -34079,27 +33358,21 @@ pub fn vpaddl_u8(a: uint8x8_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpaddlq_u8(a: uint8x16_t) -> uint16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uaddlp.v8i16.v16i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v8i16.v16i8")] - fn _vpaddlq_u8(a: uint8x16_t) -> uint16x8_t; - } - unsafe { _vpaddlq_u8(a) } +pub fn vmull_lane_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 2); + vmull_u16(a, vdup_lane_u16::(b)) } -#[doc = "Unsigned Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u16)"] +#[doc = "Vector long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uaddlp) + assert_instr(umull, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -34108,27 +33381,21 @@ pub fn vpaddlq_u8(a: uint8x16_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpaddl_u16(a: uint16x4_t) -> uint32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uaddlp.v2i32.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i32.v4i16")] - fn _vpaddl_u16(a: uint16x4_t) -> uint32x2_t; - } - unsafe { _vpaddl_u16(a) } +pub fn vmull_laneq_u16(a: uint16x4_t, b: uint16x8_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 3); + vmull_u16(a, vdup_laneq_u16::(b)) } -#[doc = "Unsigned Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u16)"] +#[doc = "Vector long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uaddlp) + assert_instr(umull, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -34137,27 +33404,21 @@ pub fn vpaddl_u16(a: uint16x4_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpaddlq_u16(a: uint16x8_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uaddlp.v4i32.v8i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i32.v8i16")] - fn _vpaddlq_u16(a: uint16x8_t) -> uint32x4_t; - } - unsafe { _vpaddlq_u16(a) } +pub fn vmull_lane_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { + static_assert_uimm_bits!(LANE, 1); + vmull_u32(a, vdup_lane_u32::(b)) } -#[doc = "Unsigned Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u32)"] +#[doc = "Vector long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uaddlp) + assert_instr(umull, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -34166,26 +33427,19 @@ pub fn vpaddlq_u16(a: uint16x8_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpaddl_u32(a: uint32x2_t) -> uint64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uaddlp.v1i64.v2i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v1i64.v2i32")] - fn _vpaddl_u32(a: uint32x2_t) -> uint64x1_t; - } - unsafe { _vpaddl_u32(a) } +pub fn vmull_laneq_u32(a: uint32x2_t, b: uint32x4_t) -> uint64x2_t { + static_assert_uimm_bits!(LANE, 2); + vmull_u32(a, vdup_laneq_u32::(b)) } -#[doc = "Unsigned Add and Accumulate Long Pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u32)"] +#[doc = "Vector long multiply with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uaddlp) + assert_instr(smull) )] #[cfg_attr( not(target_arch = "arm"), @@ -34195,26 +33449,18 @@ pub fn vpaddl_u32(a: uint32x2_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpaddlq_u32(a: uint32x4_t) -> uint64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uaddlp.v2i64.v4i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i64.v4i32")] - fn _vpaddlq_u32(a: uint32x4_t) -> uint64x2_t; - } - unsafe { _vpaddlq_u32(a) } +pub fn vmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t { + vmull_s16(a, vdup_n_s16(b)) } -#[doc = "Folding maximum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_f32)"] +#[doc = "Vector long multiply with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fmaxp) + assert_instr(smull) )] #[cfg_attr( not(target_arch = "arm"), @@ -34224,26 +33470,18 @@ pub fn vpaddlq_u32(a: uint32x4_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fmaxp.v2f32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2f32")] - fn _vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - } - unsafe { _vpmax_f32(a, b) } +pub fn vmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t { + vmull_s32(a, vdup_n_s32(b)) } -#[doc = "Folding maximum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s8)"] +#[doc = "Vector long multiply with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smaxp) + assert_instr(umull) )] #[cfg_attr( not(target_arch = "arm"), @@ -34253,26 +33491,18 @@ pub fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.smaxp.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v8i8")] - fn _vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - } - unsafe { _vpmax_s8(a, b) } +pub fn vmull_n_u16(a: uint16x4_t, b: u16) -> uint32x4_t { + vmull_u16(a, vdup_n_u16(b)) } -#[doc = "Folding maximum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s16)"] +#[doc = "Vector long multiply with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smaxp) + assert_instr(umull) )] #[cfg_attr( not(target_arch = "arm"), @@ -34282,26 +33512,18 @@ pub fn vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.smaxp.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v4i16")] - fn _vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - } - unsafe { _vpmax_s16(a, b) } +pub fn vmull_n_u32(a: uint32x2_t, b: u32) -> uint64x2_t { + vmull_u32(a, vdup_n_u32(b)) } -#[doc = "Folding maximum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s32)"] +#[doc = "Polynomial multiply long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_p8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.p8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(smaxp) + assert_instr(pmull) )] #[cfg_attr( not(target_arch = "arm"), @@ -34311,26 +33533,26 @@ pub fn vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { +pub fn vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.smaxp.v2i32" + link_name = "llvm.aarch64.neon.pmull.v8i16" )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2i32")] - fn _vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullp.v8i16")] + fn _vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t; } - unsafe { _vpmax_s32(a, b) } + unsafe { _vmull_p8(a, b) } } -#[doc = "Folding maximum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u8)"] +#[doc = "Signed multiply long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umaxp) + assert_instr(smull) )] #[cfg_attr( not(target_arch = "arm"), @@ -34340,26 +33562,18 @@ pub fn vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.umaxp.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v8i8")] - fn _vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - } - unsafe { _vpmax_u8(a, b) } +pub fn vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { + unsafe { simd_mul(simd_cast(a), simd_cast(b)) } } -#[doc = "Folding maximum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u16)"] +#[doc = "Signed multiply long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umaxp) + assert_instr(smull) )] #[cfg_attr( not(target_arch = "arm"), @@ -34369,26 +33583,18 @@ pub fn vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.umaxp.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v4i16")] - fn _vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - } - unsafe { _vpmax_u16(a, b) } +pub fn vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { + unsafe { simd_mul(simd_cast(a), simd_cast(b)) } } -#[doc = "Folding maximum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u32)"] +#[doc = "Signed multiply long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(umaxp) + assert_instr(smull) )] #[cfg_attr( not(target_arch = "arm"), @@ -34398,26 +33604,18 @@ pub fn vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.umaxp.v2i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v2i32")] - fn _vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - } - unsafe { _vpmax_u32(a, b) } +pub fn vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t { + unsafe { simd_mul(simd_cast(a), simd_cast(b)) } } -#[doc = "Folding minimum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_f32)"] +#[doc = "Unsigned multiply long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fminp) + assert_instr(umull) )] #[cfg_attr( not(target_arch = "arm"), @@ -34427,26 +33625,18 @@ pub fn vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.fminp.v2f32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2f32")] - fn _vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - } - unsafe { _vpmin_f32(a, b) } +pub fn vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t { + unsafe { simd_mul(simd_cast(a), simd_cast(b)) } } -#[doc = "Folding minimum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s8)"] +#[doc = "Unsigned multiply long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sminp) + assert_instr(umull) )] #[cfg_attr( not(target_arch = "arm"), @@ -34456,26 +33646,18 @@ pub fn vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sminp.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v8i8")] - fn _vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - } - unsafe { _vpmin_s8(a, b) } +pub fn vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { + unsafe { simd_mul(simd_cast(a), simd_cast(b)) } } -#[doc = "Folding minimum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s16)"] +#[doc = "Unsigned multiply long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sminp) + assert_instr(umull) )] #[cfg_attr( not(target_arch = "arm"), @@ -34485,26 +33667,18 @@ pub fn vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sminp.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v4i16")] - fn _vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - } - unsafe { _vpmin_s16(a, b) } +pub fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { + unsafe { simd_mul(simd_cast(a), simd_cast(b)) } } -#[doc = "Folding minimum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s32)"] +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_p8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sminp) + assert_instr(mvn) )] #[cfg_attr( not(target_arch = "arm"), @@ -34514,26 +33688,19 @@ pub fn vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sminp.v2i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2i32")] - fn _vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - } - unsafe { _vpmin_s32(a, b) } +pub fn vmvn_p8(a: poly8x8_t) -> poly8x8_t { + let b = poly8x8_t::splat(255); + unsafe { simd_xor(a, b) } } -#[doc = "Folding minimum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u8)"] +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uminp) + assert_instr(mvn) )] #[cfg_attr( not(target_arch = "arm"), @@ -34543,26 +33710,19 @@ pub fn vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uminp.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v8i8")] - fn _vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - } - unsafe { _vpmin_u8(a, b) } +pub fn vmvn_s16(a: int16x4_t) -> int16x4_t { + let b = int16x4_t::splat(-1); + unsafe { simd_xor(a, b) } } -#[doc = "Folding minimum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u16)"] +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uminp) + assert_instr(mvn) )] #[cfg_attr( not(target_arch = "arm"), @@ -34572,26 +33732,19 @@ pub fn vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uminp.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v4i16")] - fn _vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - } - unsafe { _vpmin_u16(a, b) } +pub fn vmvn_s32(a: int32x2_t) -> int32x2_t { + let b = int32x2_t::splat(-1); + unsafe { simd_xor(a, b) } } -#[doc = "Folding minimum of adjacent pairs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u32)"] +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uminp) + assert_instr(mvn) )] #[cfg_attr( not(target_arch = "arm"), @@ -34601,26 +33754,19 @@ pub fn vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uminp.v2i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v2i32")] - fn _vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - } - unsafe { _vpmin_u32(a, b) } +pub fn vmvn_s8(a: int8x8_t) -> int8x8_t { + let b = int8x8_t::splat(-1); + unsafe { simd_xor(a, b) } } -#[doc = "Signed saturating Absolute value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s8)"] +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqabs) + assert_instr(mvn) )] #[cfg_attr( not(target_arch = "arm"), @@ -34630,26 +33776,19 @@ pub fn vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqabs_s8(a: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqabs.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i8")] - fn _vqabs_s8(a: int8x8_t) -> int8x8_t; - } - unsafe { _vqabs_s8(a) } +pub fn vmvn_u16(a: uint16x4_t) -> uint16x4_t { + let b = uint16x4_t::splat(65_535); + unsafe { simd_xor(a, b) } } -#[doc = "Signed saturating Absolute value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s8)"] +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqabs) + assert_instr(mvn) )] #[cfg_attr( not(target_arch = "arm"), @@ -34659,26 +33798,19 @@ pub fn vqabs_s8(a: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqabsq_s8(a: int8x16_t) -> int8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqabs.v16i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v16i8")] - fn _vqabsq_s8(a: int8x16_t) -> int8x16_t; - } - unsafe { _vqabsq_s8(a) } +pub fn vmvn_u32(a: uint32x2_t) -> uint32x2_t { + let b = uint32x2_t::splat(4_294_967_295); + unsafe { simd_xor(a, b) } } -#[doc = "Signed saturating Absolute value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s16)"] +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqabs) + assert_instr(mvn) )] #[cfg_attr( not(target_arch = "arm"), @@ -34688,26 +33820,19 @@ pub fn vqabsq_s8(a: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqabs_s16(a: int16x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqabs.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i16")] - fn _vqabs_s16(a: int16x4_t) -> int16x4_t; - } - unsafe { _vqabs_s16(a) } +pub fn vmvn_u8(a: uint8x8_t) -> uint8x8_t { + let b = uint8x8_t::splat(255); + unsafe { simd_xor(a, b) } } -#[doc = "Signed saturating Absolute value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s16)"] +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_p8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqabs) + assert_instr(mvn) )] #[cfg_attr( not(target_arch = "arm"), @@ -34717,26 +33842,19 @@ pub fn vqabs_s16(a: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqabsq_s16(a: int16x8_t) -> int16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqabs.v8i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i16")] - fn _vqabsq_s16(a: int16x8_t) -> int16x8_t; - } - unsafe { _vqabsq_s16(a) } +pub fn vmvnq_p8(a: poly8x16_t) -> poly8x16_t { + let b = poly8x16_t::splat(255); + unsafe { simd_xor(a, b) } } -#[doc = "Signed saturating Absolute value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s32)"] +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqabs) + assert_instr(mvn) )] #[cfg_attr( not(target_arch = "arm"), @@ -34746,26 +33864,19 @@ pub fn vqabsq_s16(a: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqabs_s32(a: int32x2_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqabs.v2i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v2i32")] - fn _vqabs_s32(a: int32x2_t) -> int32x2_t; - } - unsafe { _vqabs_s32(a) } +pub fn vmvnq_s16(a: int16x8_t) -> int16x8_t { + let b = int16x8_t::splat(-1); + unsafe { simd_xor(a, b) } } -#[doc = "Signed saturating Absolute value"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s32)"] +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqabs) + assert_instr(mvn) )] #[cfg_attr( not(target_arch = "arm"), @@ -34775,26 +33886,19 @@ pub fn vqabs_s32(a: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqabsq_s32(a: int32x4_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqabs.v4i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i32")] - fn _vqabsq_s32(a: int32x4_t) -> int32x4_t; - } - unsafe { _vqabsq_s32(a) } +pub fn vmvnq_s32(a: int32x4_t) -> int32x4_t { + let b = int32x4_t::splat(-1); + unsafe { simd_xor(a, b) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s8)"] +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqadd) + assert_instr(mvn) )] #[cfg_attr( not(target_arch = "arm"), @@ -34804,18 +33908,19 @@ pub fn vqabsq_s32(a: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe { simd_saturating_add(a, b) } +pub fn vmvnq_s8(a: int8x16_t) -> int8x16_t { + let b = int8x16_t::splat(-1); + unsafe { simd_xor(a, b) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s8)"] +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqadd) + assert_instr(mvn) )] #[cfg_attr( not(target_arch = "arm"), @@ -34825,18 +33930,19 @@ pub fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - unsafe { simd_saturating_add(a, b) } +pub fn vmvnq_u16(a: uint16x8_t) -> uint16x8_t { + let b = uint16x8_t::splat(65_535); + unsafe { simd_xor(a, b) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s16)"] +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqadd) + assert_instr(mvn) )] #[cfg_attr( not(target_arch = "arm"), @@ -34846,18 +33952,19 @@ pub fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe { simd_saturating_add(a, b) } +pub fn vmvnq_u32(a: uint32x4_t) -> uint32x4_t { + let b = uint32x4_t::splat(4_294_967_295); + unsafe { simd_xor(a, b) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s16)"] +#[doc = "Vector bitwise not."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqadd) + assert_instr(mvn) )] #[cfg_attr( not(target_arch = "arm"), @@ -34867,60 +33974,63 @@ pub fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe { simd_saturating_add(a, b) } +pub fn vmvnq_u8(a: uint8x16_t) -> uint8x16_t { + let b = uint8x16_t::splat(255); + unsafe { simd_xor(a, b) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s32)"] +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqadd) + assert_instr(fneg) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe { simd_saturating_add(a, b) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vneg_f16(a: float16x4_t) -> float16x4_t { + unsafe { simd_neg(a) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s32)"] +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqadd) + assert_instr(fneg) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe { simd_saturating_add(a, b) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vnegq_f16(a: float16x8_t) -> float16x8_t { + unsafe { simd_neg(a) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s64)"] +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqadd) + assert_instr(fneg) )] #[cfg_attr( not(target_arch = "arm"), @@ -34930,18 +34040,18 @@ pub fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - unsafe { simd_saturating_add(a, b) } +pub fn vneg_f32(a: float32x2_t) -> float32x2_t { + unsafe { simd_neg(a) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s64)"] +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqadd) + assert_instr(fneg) )] #[cfg_attr( not(target_arch = "arm"), @@ -34951,18 +34061,18 @@ pub fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - unsafe { simd_saturating_add(a, b) } +pub fn vnegq_f32(a: float32x4_t) -> float32x4_t { + unsafe { simd_neg(a) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u8)"] +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqadd) + assert_instr(neg) )] #[cfg_attr( not(target_arch = "arm"), @@ -34972,18 +34082,18 @@ pub fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { simd_saturating_add(a, b) } +pub fn vneg_s8(a: int8x8_t) -> int8x8_t { + unsafe { simd_neg(a) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u8)"] +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqadd) + assert_instr(neg) )] #[cfg_attr( not(target_arch = "arm"), @@ -34993,18 +34103,18 @@ pub fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - unsafe { simd_saturating_add(a, b) } +pub fn vnegq_s8(a: int8x16_t) -> int8x16_t { + unsafe { simd_neg(a) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u16)"] +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqadd) + assert_instr(neg) )] #[cfg_attr( not(target_arch = "arm"), @@ -35014,18 +34124,18 @@ pub fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe { simd_saturating_add(a, b) } +pub fn vneg_s16(a: int16x4_t) -> int16x4_t { + unsafe { simd_neg(a) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u16)"] +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqadd) + assert_instr(neg) )] #[cfg_attr( not(target_arch = "arm"), @@ -35035,18 +34145,18 @@ pub fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe { simd_saturating_add(a, b) } +pub fn vnegq_s16(a: int16x8_t) -> int16x8_t { + unsafe { simd_neg(a) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u32)"] +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqadd) + assert_instr(neg) )] #[cfg_attr( not(target_arch = "arm"), @@ -35056,18 +34166,18 @@ pub fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe { simd_saturating_add(a, b) } +pub fn vneg_s32(a: int32x2_t) -> int32x2_t { + unsafe { simd_neg(a) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u32)"] +#[doc = "Negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqadd) + assert_instr(neg) )] #[cfg_attr( not(target_arch = "arm"), @@ -35077,18 +34187,18 @@ pub fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe { simd_saturating_add(a, b) } +pub fn vnegq_s32(a: int32x4_t) -> int32x4_t { + unsafe { simd_neg(a) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u64)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqadd) + assert_instr(orn) )] #[cfg_attr( not(target_arch = "arm"), @@ -35098,18 +34208,19 @@ pub fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - unsafe { simd_saturating_add(a, b) } +pub fn vorn_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + let c = int16x4_t::splat(-1); + unsafe { simd_or(simd_xor(b, c), a) } } -#[doc = "Saturating add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u64)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqadd) + assert_instr(orn) )] #[cfg_attr( not(target_arch = "arm"), @@ -35119,20 +34230,20 @@ pub fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - unsafe { simd_saturating_add(a, b) } +pub fn vorn_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + let c = int32x2_t::splat(-1); + unsafe { simd_or(simd_xor(b, c), a) } } -#[doc = "Vector widening saturating doubling multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_lane_s16)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmlal, N = 2) + assert_instr(orn) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -35141,21 +34252,20 @@ pub fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmlal_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { - static_assert_uimm_bits!(N, 2); - vqaddq_s32(a, vqdmull_lane_s16::(b, c)) +pub fn vorn_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + let c = int64x1_t::splat(-1); + unsafe { simd_or(simd_xor(b, c), a) } } -#[doc = "Vector widening saturating doubling multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_lane_s32)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmlal, N = 1) + assert_instr(orn) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -35164,19 +34274,19 @@ pub fn vqdmlal_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmlal_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { - static_assert_uimm_bits!(N, 1); - vqaddq_s64(a, vqdmull_lane_s32::(b, c)) +pub fn vorn_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + let c = int8x8_t::splat(-1); + unsafe { simd_or(simd_xor(b, c), a) } } -#[doc = "Vector widening saturating doubling multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_n_s16)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmlal) + assert_instr(orn) )] #[cfg_attr( not(target_arch = "arm"), @@ -35186,18 +34296,19 @@ pub fn vqdmlal_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { - vqaddq_s32(a, vqdmull_n_s16(b, c)) +pub fn vornq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + let c = int16x8_t::splat(-1); + unsafe { simd_or(simd_xor(b, c), a) } } -#[doc = "Vector widening saturating doubling multiply accumulate with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_n_s32)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmlal) + assert_instr(orn) )] #[cfg_attr( not(target_arch = "arm"), @@ -35207,18 +34318,19 @@ pub fn vqdmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { - vqaddq_s64(a, vqdmull_n_s32(b, c)) +pub fn vornq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + let c = int32x4_t::splat(-1); + unsafe { simd_or(simd_xor(b, c), a) } } -#[doc = "Signed saturating doubling multiply-add long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_s16)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmlal) + assert_instr(orn) )] #[cfg_attr( not(target_arch = "arm"), @@ -35228,18 +34340,19 @@ pub fn vqdmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { - vqaddq_s32(a, vqdmull_s16(b, c)) +pub fn vornq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + let c = int64x2_t::splat(-1); + unsafe { simd_or(simd_xor(b, c), a) } } -#[doc = "Signed saturating doubling multiply-add long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_s32)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmlal) + assert_instr(orn) )] #[cfg_attr( not(target_arch = "arm"), @@ -35249,20 +34362,20 @@ pub fn vqdmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { - vqaddq_s64(a, vqdmull_s32(b, c)) +pub fn vornq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + let c = int8x16_t::splat(-1); + unsafe { simd_or(simd_xor(b, c), a) } } -#[doc = "Vector widening saturating doubling multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_lane_s16)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmlsl, N = 2) + assert_instr(orn) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -35271,21 +34384,20 @@ pub fn vqdmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmlsl_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { - static_assert_uimm_bits!(N, 2); - vqsubq_s32(a, vqdmull_lane_s16::(b, c)) +pub fn vorn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + let c = int16x4_t::splat(-1); + unsafe { simd_or(simd_xor(b, transmute(c)), a) } } -#[doc = "Vector widening saturating doubling multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_lane_s32)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmlsl, N = 1) + assert_instr(orn) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -35294,19 +34406,19 @@ pub fn vqdmlsl_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmlsl_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { - static_assert_uimm_bits!(N, 1); - vqsubq_s64(a, vqdmull_lane_s32::(b, c)) +pub fn vorn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + let c = int32x2_t::splat(-1); + unsafe { simd_or(simd_xor(b, transmute(c)), a) } } -#[doc = "Vector widening saturating doubling multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_n_s16)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmlsl) + assert_instr(orn) )] #[cfg_attr( not(target_arch = "arm"), @@ -35316,18 +34428,19 @@ pub fn vqdmlsl_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { - vqsubq_s32(a, vqdmull_n_s16(b, c)) +pub fn vorn_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + let c = int64x1_t::splat(-1); + unsafe { simd_or(simd_xor(b, transmute(c)), a) } } -#[doc = "Vector widening saturating doubling multiply subtract with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_n_s32)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmlsl) + assert_instr(orn) )] #[cfg_attr( not(target_arch = "arm"), @@ -35337,18 +34450,19 @@ pub fn vqdmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { - vqsubq_s64(a, vqdmull_n_s32(b, c)) +pub fn vorn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + let c = int8x8_t::splat(-1); + unsafe { simd_or(simd_xor(b, transmute(c)), a) } } -#[doc = "Signed saturating doubling multiply-subtract long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_s16)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmlsl) + assert_instr(orn) )] #[cfg_attr( not(target_arch = "arm"), @@ -35358,18 +34472,19 @@ pub fn vqdmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { - vqsubq_s32(a, vqdmull_s16(b, c)) +pub fn vornq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + let c = int16x8_t::splat(-1); + unsafe { simd_or(simd_xor(b, transmute(c)), a) } } -#[doc = "Signed saturating doubling multiply-subtract long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_s32)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmlsl) + assert_instr(orn) )] #[cfg_attr( not(target_arch = "arm"), @@ -35379,20 +34494,20 @@ pub fn vqdmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { - vqsubq_s64(a, vqdmull_s32(b, c)) +pub fn vornq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + let c = int32x4_t::splat(-1); + unsafe { simd_or(simd_xor(b, transmute(c)), a) } } -#[doc = "Vector saturating doubling multiply high by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_laneq_s16)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmulh, LANE = 0) + assert_instr(orn) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -35401,21 +34516,20 @@ pub fn vqdmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmulh_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4_t { - static_assert_uimm_bits!(LANE, 3); - unsafe { vqdmulh_s16(a, vdup_n_s16(simd_extract!(b, LANE as u32))) } +pub fn vornq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + let c = int64x2_t::splat(-1); + unsafe { simd_or(simd_xor(b, transmute(c)), a) } } -#[doc = "Vector saturating doubling multiply high by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_laneq_s16)"] +#[doc = "Vector bitwise inclusive OR NOT"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmulh, LANE = 0) + assert_instr(orn) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -35424,21 +34538,20 @@ pub fn vqdmulh_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmulhq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - static_assert_uimm_bits!(LANE, 3); - unsafe { vqdmulhq_s16(a, vdupq_n_s16(simd_extract!(b, LANE as u32))) } +pub fn vornq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + let c = int8x16_t::splat(-1); + unsafe { simd_or(simd_xor(b, transmute(c)), a) } } -#[doc = "Vector saturating doubling multiply high by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_laneq_s32)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmulh, LANE = 0) + assert_instr(orr) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -35447,21 +34560,19 @@ pub fn vqdmulhq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16x target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmulh_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { vqdmulh_s32(a, vdup_n_s32(simd_extract!(b, LANE as u32))) } +pub fn vorr_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_or(a, b) } } -#[doc = "Vector saturating doubling multiply high by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_laneq_s32)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmulh, LANE = 0) + assert_instr(orr) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -35470,19 +34581,18 @@ pub fn vqdmulh_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmulhq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { vqdmulhq_s32(a, vdupq_n_s32(simd_extract!(b, LANE as u32))) } +pub fn vorrq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { simd_or(a, b) } } -#[doc = "Vector saturating doubling multiply high with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_n_s16)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmulh) + assert_instr(orr) )] #[cfg_attr( not(target_arch = "arm"), @@ -35492,19 +34602,18 @@ pub fn vqdmulhq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32x target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t { - let b: int16x4_t = vdup_n_s16(b); - vqdmulh_s16(a, b) +pub fn vorr_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { simd_or(a, b) } } -#[doc = "Vector saturating doubling multiply high with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_n_s16)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmulh) + assert_instr(orr) )] #[cfg_attr( not(target_arch = "arm"), @@ -35514,19 +34623,18 @@ pub fn vqdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t { - let b: int16x8_t = vdupq_n_s16(b); - vqdmulhq_s16(a, b) +pub fn vorrq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { simd_or(a, b) } } -#[doc = "Vector saturating doubling multiply high with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_n_s32)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmulh) + assert_instr(orr) )] #[cfg_attr( not(target_arch = "arm"), @@ -35536,19 +34644,18 @@ pub fn vqdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t { - let b: int32x2_t = vdup_n_s32(b); - vqdmulh_s32(a, b) +pub fn vorr_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { simd_or(a, b) } } -#[doc = "Vector saturating doubling multiply high with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_n_s32)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmulh) + assert_instr(orr) )] #[cfg_attr( not(target_arch = "arm"), @@ -35558,19 +34665,18 @@ pub fn vqdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t { - let b: int32x4_t = vdupq_n_s32(b); - vqdmulhq_s32(a, b) +pub fn vorrq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { simd_or(a, b) } } -#[doc = "Signed saturating doubling multiply returning high half"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_s16)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmulh) + assert_instr(orr) )] #[cfg_attr( not(target_arch = "arm"), @@ -35580,26 +34686,18 @@ pub fn vqdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqdmulh.v4i16" - )] - fn _vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - } - unsafe { _vqdmulh_s16(a, b) } +pub fn vorr_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe { simd_or(a, b) } } -#[doc = "Signed saturating doubling multiply returning high half"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_s16)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmulh) + assert_instr(orr) )] #[cfg_attr( not(target_arch = "arm"), @@ -35609,26 +34707,18 @@ pub fn vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v8i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqdmulh.v8i16" - )] - fn _vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - } - unsafe { _vqdmulhq_s16(a, b) } +pub fn vorrq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { simd_or(a, b) } } -#[doc = "Signed saturating doubling multiply returning high half"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_s32)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmulh) + assert_instr(orr) )] #[cfg_attr( not(target_arch = "arm"), @@ -35638,26 +34728,18 @@ pub fn vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v2i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqdmulh.v2i32" - )] - fn _vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - } - unsafe { _vqdmulh_s32(a, b) } +pub fn vorr_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_or(a, b) } } -#[doc = "Signed saturating doubling multiply returning high half"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_s32)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmulh) + assert_instr(orr) )] #[cfg_attr( not(target_arch = "arm"), @@ -35667,28 +34749,19 @@ pub fn vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqdmulh.v4i32" - )] - fn _vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - } - unsafe { _vqdmulhq_s32(a, b) } +pub fn vorrq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { simd_or(a, b) } } -#[doc = "Vector saturating doubling long multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_lane_s16)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmull, N = 2) + assert_instr(orr) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -35697,24 +34770,19 @@ pub fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmull_lane_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { - static_assert_uimm_bits!(N, 2); - unsafe { - let b: int16x4_t = simd_shuffle!(b, b, [N as u32; 4]); - vqdmull_s16(a, b) - } +pub fn vorr_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_or(a, b) } } -#[doc = "Vector saturating doubling long multiply by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_lane_s32)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmull, N = 1) + assert_instr(orr) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -35723,22 +34791,18 @@ pub fn vqdmull_lane_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmull_lane_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { - static_assert_uimm_bits!(N, 1); - unsafe { - let b: int32x2_t = simd_shuffle!(b, b, [N as u32; 2]); - vqdmull_s32(a, b) - } +pub fn vorrq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_or(a, b) } } -#[doc = "Vector saturating doubling long multiply with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_n_s16)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmull) + assert_instr(orr) )] #[cfg_attr( not(target_arch = "arm"), @@ -35748,18 +34812,18 @@ pub fn vqdmull_lane_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t { - vqdmull_s16(a, vdup_n_s16(b)) +pub fn vorr_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_or(a, b) } } -#[doc = "Vector saturating doubling long multiply with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_n_s32)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmull) + assert_instr(orr) )] #[cfg_attr( not(target_arch = "arm"), @@ -35769,18 +34833,18 @@ pub fn vqdmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t { - vqdmull_s32(a, vdup_n_s32(b)) +pub fn vorrq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_or(a, b) } } -#[doc = "Signed saturating doubling multiply long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_s16)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmull) + assert_instr(orr) )] #[cfg_attr( not(target_arch = "arm"), @@ -35790,26 +34854,18 @@ pub fn vqdmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v4i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqdmull.v4i32" - )] - fn _vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t; - } - unsafe { _vqdmull_s16(a, b) } +pub fn vorr_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + unsafe { simd_or(a, b) } } -#[doc = "Signed saturating doubling multiply long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_s32)"] +#[doc = "Vector bitwise or (immediate, inclusive)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqdmull) + assert_instr(orr) )] #[cfg_attr( not(target_arch = "arm"), @@ -35819,26 +34875,18 @@ pub fn vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v2i64")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqdmull.v2i64" - )] - fn _vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t; - } - unsafe { _vqdmull_s32(a, b) } +pub fn vorrq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { simd_or(a, b) } } -#[doc = "Signed saturating extract narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s16)"] +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqxtn) + assert_instr(sadalp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35848,26 +34896,27 @@ pub fn vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqmovn_s16(a: int16x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v8i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqxtn.v8i8" - )] - fn _vqmovn_s16(a: int16x8_t) -> int8x8_t; +pub fn vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t { + let x: int16x4_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadal_s8(a, b); } - unsafe { _vqmovn_s16(a) } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddl_s8(b), a); + }; + x } -#[doc = "Signed saturating extract narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s32)"] +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqxtn) + assert_instr(sadalp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35877,26 +34926,27 @@ pub fn vqmovn_s16(a: int16x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqmovn_s32(a: int32x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v4i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqxtn.v4i16" - )] - fn _vqmovn_s32(a: int32x4_t) -> int16x4_t; +pub fn vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { + let x: int16x8_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadalq_s8(a, b); } - unsafe { _vqmovn_s32(a) } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddlq_s8(b), a); + }; + x } -#[doc = "Signed saturating extract narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s64)"] +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqxtn) + assert_instr(sadalp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35906,26 +34956,27 @@ pub fn vqmovn_s32(a: int32x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqmovn_s64(a: int64x2_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v2i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqxtn.v2i32" - )] - fn _vqmovn_s64(a: int64x2_t) -> int32x2_t; +pub fn vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t { + let x: int32x2_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadal_s16(a, b); } - unsafe { _vqmovn_s64(a) } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddl_s16(b), a); + }; + x } -#[doc = "Unsigned saturating extract narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u16)"] +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqxtn) + assert_instr(sadalp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35935,26 +34986,27 @@ pub fn vqmovn_s64(a: int64x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqmovn_u16(a: uint16x8_t) -> uint8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v8i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqxtn.v8i8" - )] - fn _vqmovn_u16(a: uint16x8_t) -> uint8x8_t; +pub fn vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { + let x: int32x4_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadalq_s16(a, b); } - unsafe { _vqmovn_u16(a) } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddlq_s16(b), a); + }; + x } -#[doc = "Unsigned saturating extract narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u32)"] +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqxtn) + assert_instr(sadalp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35964,26 +35016,27 @@ pub fn vqmovn_u16(a: uint16x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqmovn_u32(a: uint32x4_t) -> uint16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v4i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqxtn.v4i16" - )] - fn _vqmovn_u32(a: uint32x4_t) -> uint16x4_t; +pub fn vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t { + let x: int64x1_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadal_s32(a, b); } - unsafe { _vqmovn_u32(a) } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddl_s32(b), a); + }; + x } -#[doc = "Unsigned saturating extract narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u64)"] +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqxtn) + assert_instr(sadalp) )] #[cfg_attr( not(target_arch = "arm"), @@ -35993,26 +35046,27 @@ pub fn vqmovn_u32(a: uint32x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v2i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqxtn.v2i32" - )] - fn _vqmovn_u64(a: uint64x2_t) -> uint32x2_t; +pub fn vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { + let x: int64x2_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadalq_s32(a, b); } - unsafe { _vqmovn_u64(a) } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddlq_s32(b), a); + }; + x } -#[doc = "Signed saturating extract unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s16)"] +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqxtun) + assert_instr(uadalp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36022,26 +35076,27 @@ pub fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqmovun_s16(a: int16x8_t) -> uint8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v8i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqxtun.v8i8" - )] - fn _vqmovun_s16(a: int16x8_t) -> uint8x8_t; +pub fn vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t { + let x: uint16x4_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadal_u8(a, b); } - unsafe { _vqmovun_s16(a) } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddl_u8(b), a); + }; + x } -#[doc = "Signed saturating extract unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s32)"] +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqxtun) + assert_instr(uadalp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36051,26 +35106,27 @@ pub fn vqmovun_s16(a: int16x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqmovun_s32(a: int32x4_t) -> uint16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v4i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqxtun.v4i16" - )] - fn _vqmovun_s32(a: int32x4_t) -> uint16x4_t; +pub fn vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { + let x: uint16x8_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadalq_u8(a, b); } - unsafe { _vqmovun_s32(a) } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddlq_u8(b), a); + }; + x } -#[doc = "Signed saturating extract unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s64)"] +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqxtun) + assert_instr(uadalp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36080,26 +35136,27 @@ pub fn vqmovun_s32(a: int32x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqmovun_s64(a: int64x2_t) -> uint32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v2i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqxtun.v2i32" - )] - fn _vqmovun_s64(a: int64x2_t) -> uint32x2_t; +pub fn vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t { + let x: uint32x2_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadal_u16(a, b); } - unsafe { _vqmovun_s64(a) } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddl_u16(b), a); + }; + x } -#[doc = "Signed saturating negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s8)"] +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqneg) + assert_instr(uadalp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36109,26 +35166,27 @@ pub fn vqmovun_s64(a: int64x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqneg_s8(a: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqneg.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i8")] - fn _vqneg_s8(a: int8x8_t) -> int8x8_t; +pub fn vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { + let x: uint32x4_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadalq_u16(a, b); } - unsafe { _vqneg_s8(a) } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddlq_u16(b), a); + }; + x } -#[doc = "Signed saturating negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s8)"] +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqneg) + assert_instr(uadalp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36138,26 +35196,27 @@ pub fn vqneg_s8(a: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqnegq_s8(a: int8x16_t) -> int8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqneg.v16i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v16i8")] - fn _vqnegq_s8(a: int8x16_t) -> int8x16_t; +pub fn vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t { + let x: uint64x1_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadal_u32(a, b); } - unsafe { _vqnegq_s8(a) } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddl_u32(b), a); + }; + x } -#[doc = "Signed saturating negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s16)"] +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqneg) + assert_instr(uadalp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36167,84 +35226,95 @@ pub fn vqnegq_s8(a: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqneg_s16(a: int16x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqneg.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i16")] - fn _vqneg_s16(a: int16x4_t) -> int16x4_t; +pub fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { + let x: uint64x2_t; + #[cfg(target_arch = "arm")] + { + x = priv_vpadalq_u32(a, b); } - unsafe { _vqneg_s16(a) } + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + unsafe { + x = simd_add(vpaddlq_u32(b), a); + }; + x } -#[doc = "Signed saturating negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s16)"] +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqneg) + assert_instr(faddp) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqnegq_s16(a: int16x8_t) -> int16x8_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4f16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqneg.v8i16" + link_name = "llvm.aarch64.neon.faddp.v4f16" )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i16")] - fn _vqnegq_s16(a: int16x8_t) -> int16x8_t; + fn _vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; } - unsafe { _vqnegq_s16(a) } + unsafe { _vpadd_f16(a, b) } } -#[doc = "Signed saturating negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s32)"] +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqneg) + assert_instr(faddp) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqneg_s32(a: int32x2_t) -> int32x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4f16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqneg.v2i32" + link_name = "llvm.aarch64.neon.faddp.v4f16" )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v2i32")] - fn _vqneg_s32(a: int32x2_t) -> int32x2_t; + fn _vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = _vpadd_f16(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vqneg_s32(a) } } -#[doc = "Signed saturating negate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s32)"] +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqneg) + assert_instr(faddp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36254,28 +35324,28 @@ pub fn vqneg_s32(a: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqnegq_s32(a: int32x4_t) -> int32x4_t { +pub fn vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2f32")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqneg.v4i32" + link_name = "llvm.aarch64.neon.faddp.v2f32" )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i32")] - fn _vqnegq_s32(a: int32x4_t) -> int32x4_t; + fn _vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; } - unsafe { _vqnegq_s32(a) } + unsafe { _vpadd_f32(a, b) } } -#[doc = "Vector rounding saturating doubling multiply high by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_lane_s16)"] +#[doc = "Floating-point add pairwise"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh, LANE = 1) + assert_instr(faddp) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -36284,25 +35354,33 @@ pub fn vqnegq_s32(a: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulh_lane_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - static_assert_uimm_bits!(LANE, 2); +pub fn vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.faddp.v2f32" + )] + fn _vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } unsafe { - let b: int16x4_t = - simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vqrdmulh_s16(a, b) + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = _vpadd_f32(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Vector rounding saturating doubling multiply high by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_lane_s32)"] +#[doc = "Add pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh, LANE = 1) + assert_instr(addp) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -36311,24 +35389,28 @@ pub fn vqrdmulh_lane_s16(a: int16x4_t, b: int16x4_t) -> int16x4 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulh_lane_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { - let b: int32x2_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32]); - vqrdmulh_s32(a, b) +pub fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.addp.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v8i8")] + fn _vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; } + unsafe { _vpadd_s8(a, b) } } -#[doc = "Vector rounding saturating doubling multiply high by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_laneq_s16)"] +#[doc = "Add pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh, LANE = 1) + assert_instr(addp) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -36337,25 +35419,33 @@ pub fn vqrdmulh_lane_s32(a: int32x2_t, b: int32x2_t) -> int32x2 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulh_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4_t { - static_assert_uimm_bits!(LANE, 3); +pub fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.addp.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v8i8")] + fn _vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } unsafe { - let b: int16x4_t = - simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vqrdmulh_s16(a, b) + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = _vpadd_s8(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector rounding saturating doubling multiply high by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_laneq_s32)"] +#[doc = "Add pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh, LANE = 1) + assert_instr(addp) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -36364,24 +35454,28 @@ pub fn vqrdmulh_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulh_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { - let b: int32x2_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32]); - vqrdmulh_s32(a, b) +pub fn vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.addp.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4i16")] + fn _vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; } + unsafe { _vpadd_s16(a, b) } } -#[doc = "Vector rounding saturating doubling multiply high by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_lane_s16)"] +#[doc = "Add pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh, LANE = 1) + assert_instr(addp) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -36390,37 +35484,33 @@ pub fn vqrdmulh_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulhq_lane_s16(a: int16x8_t, b: int16x4_t) -> int16x8_t { - static_assert_uimm_bits!(LANE, 2); +pub fn vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.addp.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4i16")] + fn _vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } unsafe { - let b: int16x8_t = simd_shuffle!( - b, - b, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ); - vqrdmulhq_s16(a, b) + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int16x4_t = _vpadd_s16(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector rounding saturating doubling multiply high by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_lane_s32)"] +#[doc = "Add pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh, LANE = 1) + assert_instr(addp) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -36429,25 +35519,28 @@ pub fn vqrdmulhq_lane_s16(a: int16x8_t, b: int16x4_t) -> int16x target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulhq_lane_s32(a: int32x4_t, b: int32x2_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { - let b: int32x4_t = - simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vqrdmulhq_s32(a, b) +pub fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.addp.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2i32")] + fn _vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; } + unsafe { _vpadd_s32(a, b) } } -#[doc = "Vector rounding saturating doubling multiply high by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_laneq_s16)"] +#[doc = "Add pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh, LANE = 1) + assert_instr(addp) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -36456,37 +35549,32 @@ pub fn vqrdmulhq_lane_s32(a: int32x4_t, b: int32x2_t) -> int32x target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulhq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - static_assert_uimm_bits!(LANE, 3); +pub fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.addp.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2i32")] + fn _vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } unsafe { - let b: int16x8_t = simd_shuffle!( - b, - b, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ); - vqrdmulhq_s16(a, b) + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int32x2_t = _vpadd_s32(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Vector rounding saturating doubling multiply high by scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_laneq_s32)"] +#[doc = "Add pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh, LANE = 1) + assert_instr(addp) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -36495,23 +35583,18 @@ pub fn vqrdmulhq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulhq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { - let b: int32x4_t = - simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vqrdmulhq_s32(a, b) - } +pub fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { transmute(vpadd_s8(transmute(a), transmute(b))) } } -#[doc = "Vector saturating rounding doubling multiply high with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_n_s16)"] +#[doc = "Add pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh) + assert_instr(addp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36521,18 +35604,18 @@ pub fn vqrdmulhq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t { - vqrdmulh_s16(a, vdup_n_s16(b)) +pub fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { transmute(vpadd_s16(transmute(a), transmute(b))) } } -#[doc = "Vector saturating rounding doubling multiply high with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_n_s16)"] +#[doc = "Add pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh) + assert_instr(addp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36542,18 +35625,18 @@ pub fn vqrdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t { - vqrdmulhq_s16(a, vdupq_n_s16(b)) +pub fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { transmute(vpadd_s32(transmute(a), transmute(b))) } } -#[doc = "Vector saturating rounding doubling multiply high with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_n_s32)"] +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh) + assert_instr(saddlp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36563,18 +35646,26 @@ pub fn vqrdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t { - vqrdmulh_s32(a, vdup_n_s32(b)) +pub fn vpaddl_s8(a: int8x8_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.saddlp.v4i16.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v4i16.v8i8")] + fn _vpaddl_s8(a: int8x8_t) -> int16x4_t; + } + unsafe { _vpaddl_s8(a) } } -#[doc = "Vector saturating rounding doubling multiply high with scalar"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_n_s32)"] +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh) + assert_instr(saddlp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36584,18 +35675,26 @@ pub fn vqrdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t { - vqrdmulhq_s32(a, vdupq_n_s32(b)) +pub fn vpaddlq_s8(a: int8x16_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.saddlp.v8i16.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v8i16.v16i8")] + fn _vpaddlq_s8(a: int8x16_t) -> int16x8_t; + } + unsafe { _vpaddlq_s8(a) } } -#[doc = "Signed saturating rounding doubling multiply returning high half"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_s16)"] +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh) + assert_instr(saddlp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36605,26 +35704,26 @@ pub fn vqrdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { +pub fn vpaddl_s16(a: int16x4_t) -> int32x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrdmulh.v4i16" + link_name = "llvm.aarch64.neon.saddlp.v2i32.v4i16" )] - fn _vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i32.v4i16")] + fn _vpaddl_s16(a: int16x4_t) -> int32x2_t; } - unsafe { _vqrdmulh_s16(a, b) } + unsafe { _vpaddl_s16(a) } } -#[doc = "Signed saturating rounding doubling multiply returning high half"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_s16)"] +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh) + assert_instr(saddlp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36634,26 +35733,26 @@ pub fn vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { +pub fn vpaddlq_s16(a: int16x8_t) -> int32x4_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v8i16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrdmulh.v8i16" + link_name = "llvm.aarch64.neon.saddlp.v4i32.v8i16" )] - fn _vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v4i32.v8i16")] + fn _vpaddlq_s16(a: int16x8_t) -> int32x4_t; } - unsafe { _vqrdmulhq_s16(a, b) } + unsafe { _vpaddlq_s16(a) } } -#[doc = "Signed saturating rounding doubling multiply returning high half"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_s32)"] +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh) + assert_instr(saddlp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36663,26 +35762,26 @@ pub fn vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { +pub fn vpaddl_s32(a: int32x2_t) -> int64x1_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v2i32")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrdmulh.v2i32" + link_name = "llvm.aarch64.neon.saddlp.v1i64.v2i32" )] - fn _vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v1i64.v2i32")] + fn _vpaddl_s32(a: int32x2_t) -> int64x1_t; } - unsafe { _vqrdmulh_s32(a, b) } + unsafe { _vpaddl_s32(a) } } -#[doc = "Signed saturating rounding doubling multiply returning high half"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_s32)"] +#[doc = "Signed Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrdmulh) + assert_instr(saddlp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36692,26 +35791,26 @@ pub fn vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { +pub fn vpaddlq_s32(a: int32x4_t) -> int64x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i32")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrdmulh.v4i32" + link_name = "llvm.aarch64.neon.saddlp.v2i64.v4i32" )] - fn _vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i64.v4i32")] + fn _vpaddlq_s32(a: int32x4_t) -> int64x2_t; } - unsafe { _vqrdmulhq_s32(a, b) } + unsafe { _vpaddlq_s32(a) } } -#[doc = "Signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s8)"] +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrshl) + assert_instr(uaddlp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36721,26 +35820,26 @@ pub fn vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { +pub fn vpaddl_u8(a: uint8x8_t) -> uint16x4_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i8")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrshl.v8i8" + link_name = "llvm.aarch64.neon.uaddlp.v4i16.v8i8" )] - fn _vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i16.v8i8")] + fn _vpaddl_u8(a: uint8x8_t) -> uint16x4_t; } - unsafe { _vqrshl_s8(a, b) } + unsafe { _vpaddl_u8(a) } } -#[doc = "Signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s8)"] +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrshl) + assert_instr(uaddlp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36750,26 +35849,26 @@ pub fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { +pub fn vpaddlq_u8(a: uint8x16_t) -> uint16x8_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v16i8")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrshl.v16i8" + link_name = "llvm.aarch64.neon.uaddlp.v8i16.v16i8" )] - fn _vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v8i16.v16i8")] + fn _vpaddlq_u8(a: uint8x16_t) -> uint16x8_t; } - unsafe { _vqrshlq_s8(a, b) } + unsafe { _vpaddlq_u8(a) } } -#[doc = "Signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s16)"] +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrshl) + assert_instr(uaddlp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36779,26 +35878,26 @@ pub fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { +pub fn vpaddl_u16(a: uint16x4_t) -> uint32x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrshl.v4i16" + link_name = "llvm.aarch64.neon.uaddlp.v2i32.v4i16" )] - fn _vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i32.v4i16")] + fn _vpaddl_u16(a: uint16x4_t) -> uint32x2_t; } - unsafe { _vqrshl_s16(a, b) } + unsafe { _vpaddl_u16(a) } } -#[doc = "Signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s16)"] +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrshl) + assert_instr(uaddlp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36808,26 +35907,26 @@ pub fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { +pub fn vpaddlq_u16(a: uint16x8_t) -> uint32x4_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrshl.v8i16" + link_name = "llvm.aarch64.neon.uaddlp.v4i32.v8i16" )] - fn _vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i32.v8i16")] + fn _vpaddlq_u16(a: uint16x8_t) -> uint32x4_t; } - unsafe { _vqrshlq_s16(a, b) } + unsafe { _vpaddlq_u16(a) } } -#[doc = "Signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s32)"] +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrshl) + assert_instr(uaddlp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36837,26 +35936,26 @@ pub fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { +pub fn vpaddl_u32(a: uint32x2_t) -> uint64x1_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i32")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrshl.v2i32" + link_name = "llvm.aarch64.neon.uaddlp.v1i64.v2i32" )] - fn _vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v1i64.v2i32")] + fn _vpaddl_u32(a: uint32x2_t) -> uint64x1_t; } - unsafe { _vqrshl_s32(a, b) } + unsafe { _vpaddl_u32(a) } } -#[doc = "Signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s32)"] +#[doc = "Unsigned Add and Accumulate Long Pairwise."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrshl) + assert_instr(uaddlp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36866,26 +35965,27 @@ pub fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { +pub fn vpaddlq_u32(a: uint32x4_t) -> uint64x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i32")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrshl.v4i32" + link_name = "llvm.aarch64.neon.uaddlp.v2i64.v4i32" )] - fn _vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i64.v4i32")] + fn _vpaddlq_u32(a: uint32x4_t) -> uint64x2_t; } - unsafe { _vqrshlq_s32(a, b) } + unsafe { _vpaddlq_u32(a) } } -#[doc = "Signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s64)"] +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrshl) + assert_instr(fmaxp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36895,26 +35995,27 @@ pub fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { +pub fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v1i64")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrshl.v1i64" + link_name = "llvm.aarch64.neon.fmaxp.v2f32" )] - fn _vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2f32")] + fn _vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; } - unsafe { _vqrshl_s64(a, b) } + unsafe { _vpmax_f32(a, b) } } -#[doc = "Signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s64)"] +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqrshl) + assert_instr(fmaxp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36924,26 +36025,32 @@ pub fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { +pub fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i64")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrshl.v2i64" + link_name = "llvm.aarch64.neon.fmaxp.v2f32" )] - fn _vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2f32")] + fn _vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = _vpmax_f32(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) } - unsafe { _vqrshlq_s64(a, b) } } -#[doc = "Unsigned signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u8)"] +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqrshl) + assert_instr(smaxp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36953,26 +36060,27 @@ pub fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { +pub fn vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i8")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqrshl.v8i8" + link_name = "llvm.aarch64.neon.smaxp.v8i8" )] - fn _vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v8i8")] + fn _vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; } - unsafe { _vqrshl_u8(a, b) } + unsafe { _vpmax_s8(a, b) } } -#[doc = "Unsigned signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u8)"] +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqrshl) + assert_instr(smaxp) )] #[cfg_attr( not(target_arch = "arm"), @@ -36982,26 +36090,32 @@ pub fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { +pub fn vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v16i8")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqrshl.v16i8" + link_name = "llvm.aarch64.neon.smaxp.v8i8" )] - fn _vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v8i8")] + fn _vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = _vpmax_s8(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } - unsafe { _vqrshlq_u8(a, b) } } -#[doc = "Unsigned signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u16)"] +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqrshl) + assert_instr(smaxp) )] #[cfg_attr( not(target_arch = "arm"), @@ -37011,26 +36125,27 @@ pub fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { +pub fn vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqrshl.v4i16" + link_name = "llvm.aarch64.neon.smaxp.v4i16" )] - fn _vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v4i16")] + fn _vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; } - unsafe { _vqrshl_u16(a, b) } + unsafe { _vpmax_s16(a, b) } } -#[doc = "Unsigned signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u16)"] +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqrshl) + assert_instr(smaxp) )] #[cfg_attr( not(target_arch = "arm"), @@ -37040,26 +36155,32 @@ pub fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { +pub fn vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqrshl.v8i16" + link_name = "llvm.aarch64.neon.smaxp.v4i16" )] - fn _vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v4i16")] + fn _vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int16x4_t = _vpmax_s16(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vqrshlq_u16(a, b) } } -#[doc = "Unsigned signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u32)"] +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqrshl) + assert_instr(smaxp) )] #[cfg_attr( not(target_arch = "arm"), @@ -37069,26 +36190,27 @@ pub fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { +pub fn vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i32")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqrshl.v2i32" + link_name = "llvm.aarch64.neon.smaxp.v2i32" )] - fn _vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2i32")] + fn _vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; } - unsafe { _vqrshl_u32(a, b) } + unsafe { _vpmax_s32(a, b) } } -#[doc = "Unsigned signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u32)"] +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqrshl) + assert_instr(smaxp) )] #[cfg_attr( not(target_arch = "arm"), @@ -37098,26 +36220,32 @@ pub fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { +pub fn vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i32")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqrshl.v4i32" + link_name = "llvm.aarch64.neon.smaxp.v2i32" )] - fn _vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2i32")] + fn _vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int32x2_t = _vpmax_s32(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) } - unsafe { _vqrshlq_u32(a, b) } } -#[doc = "Unsigned signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u64)"] +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqrshl) + assert_instr(umaxp) )] #[cfg_attr( not(target_arch = "arm"), @@ -37127,26 +36255,27 @@ pub fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { +pub fn vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v1i64")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqrshl.v1i64" + link_name = "llvm.aarch64.neon.umaxp.v8i8" )] - fn _vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v8i8")] + fn _vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; } - unsafe { _vqrshl_u64(a, b) } + unsafe { _vpmax_u8(a, b) } } -#[doc = "Unsigned signed saturating rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u64)"] +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqrshl) + assert_instr(umaxp) )] #[cfg_attr( not(target_arch = "arm"), @@ -37156,343 +36285,293 @@ pub fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { +pub fn vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i64")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqrshl.v2i64" + link_name = "llvm.aarch64.neon.umaxp.v8i8" )] - fn _vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v8i8")] + fn _vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; } - unsafe { _vqrshlq_u64(a, b) } -} -#[doc = "Signed saturating rounded shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s16)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqrshrn_n_s16(a: int16x8_t) -> int8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v8i8")] - fn _vqrshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t; + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = _vpmax_u8(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } - unsafe { _vqrshrn_n_s16(a, const { int16x8_t([-N as i16; 8]) }) } } -#[doc = "Signed saturating rounded shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s32)"] +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u16)"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqrshrn_n_s32(a: int32x4_t) -> int16x4_t { - static_assert!(N >= 1 && N <= 16); +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umaxp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v4i16")] - fn _vqrshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t; + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.umaxp.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v4i16")] + fn _vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; } - unsafe { _vqrshrn_n_s32(a, const { int32x4_t([-N; 4]) }) } + unsafe { _vpmax_u16(a, b) } } -#[doc = "Signed saturating rounded shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s64)"] +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u16)"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqrshrn_n_s64(a: int64x2_t) -> int32x2_t { - static_assert!(N >= 1 && N <= 32); +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umaxp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v2i32")] - fn _vqrshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t; + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.umaxp.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v4i16")] + fn _vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } + unsafe { + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint16x4_t = _vpmax_u16(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } - unsafe { _vqrshrn_n_s64(a, const { int64x2_t([-N as i64; 2]) }) } } -#[doc = "Signed saturating rounded shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s16)"] +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqrshrn_n_s16(a: int16x8_t) -> int8x8_t { - static_assert!(N >= 1 && N <= 8); +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umaxp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrshrn.v8i8" + link_name = "llvm.aarch64.neon.umaxp.v2i32" )] - fn _vqrshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v2i32")] + fn _vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; } - unsafe { _vqrshrn_n_s16(a, N) } + unsafe { _vpmax_u32(a, b) } } -#[doc = "Signed saturating rounded shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s32)"] +#[doc = "Folding maximum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqrshrn_n_s32(a: int32x4_t) -> int16x4_t { - static_assert!(N >= 1 && N <= 16); +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(umaxp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrshrn.v4i16" + link_name = "llvm.aarch64.neon.umaxp.v2i32" )] - fn _vqrshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v2i32")] + fn _vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } + unsafe { + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint32x2_t = _vpmax_u32(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) } - unsafe { _vqrshrn_n_s32(a, N) } } -#[doc = "Signed saturating rounded shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s64)"] +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqrshrn_n_s64(a: int64x2_t) -> int32x2_t { - static_assert!(N >= 1 && N <= 32); +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fminp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrshrn.v2i32" + link_name = "llvm.aarch64.neon.fminp.v2f32" )] - fn _vqrshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2f32")] + fn _vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; } - unsafe { _vqrshrn_n_s64(a, N) } -} -#[doc = "Unsigned signed saturating rounded shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u16)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqrshrn_n_u16(a: uint16x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v8i8")] - fn _vqrshrn_n_u16(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t; - } - unsafe { _vqrshrn_n_u16(a, const { uint16x8_t([-N as u16; 8]) }) } -} -#[doc = "Unsigned signed saturating rounded shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u32)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqrshrn_n_u32(a: uint32x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v4i16")] - fn _vqrshrn_n_u32(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t; - } - unsafe { _vqrshrn_n_u32(a, const { uint32x4_t([-N as u32; 4]) }) } -} -#[doc = "Unsigned signed saturating rounded shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u64)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v2i32")] - fn _vqrshrn_n_u64(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t; - } - unsafe { _vqrshrn_n_u64(a, const { uint64x2_t([-N as u64; 2]) }) } -} -#[doc = "Unsigned signed saturating rounded shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(uqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqrshrn_n_u16(a: uint16x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqrshrn.v8i8" - )] - fn _vqrshrn_n_u16(a: uint16x8_t, n: i32) -> uint8x8_t; - } - unsafe { _vqrshrn_n_u16(a, N) } -} -#[doc = "Unsigned signed saturating rounded shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u32)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(uqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqrshrn_n_u32(a: uint32x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqrshrn.v4i16" - )] - fn _vqrshrn_n_u32(a: uint32x4_t, n: i32) -> uint16x4_t; - } - unsafe { _vqrshrn_n_u32(a, N) } + unsafe { _vpmin_f32(a, b) } } -#[doc = "Unsigned signed saturating rounded shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u64)"] +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(uqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fminp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqrshrn.v2i32" + link_name = "llvm.aarch64.neon.fminp.v2f32" )] - fn _vqrshrn_n_u64(a: uint64x2_t, n: i32) -> uint32x2_t; - } - unsafe { _vqrshrn_n_u64(a, N) } -} -#[doc = "Signed saturating rounded shift right unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s16)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqrshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqrshrun_n_s16(a: int16x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v8i8")] - fn _vqrshrun_n_s16(a: int16x8_t, n: int16x8_t) -> uint8x8_t; - } - unsafe { _vqrshrun_n_s16(a, const { int16x8_t([-N as i16; 8]) }) } -} -#[doc = "Signed saturating rounded shift right unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s32)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqrshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqrshrun_n_s32(a: int32x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v4i16")] - fn _vqrshrun_n_s32(a: int32x4_t, n: int32x4_t) -> uint16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2f32")] + fn _vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; } - unsafe { _vqrshrun_n_s32(a, const { int32x4_t([-N; 4]) }) } -} -#[doc = "Signed saturating rounded shift right unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s64)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqrshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqrshrun_n_s64(a: int64x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v2i32")] - fn _vqrshrun_n_s64(a: int64x2_t, n: int64x2_t) -> uint32x2_t; + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = _vpmin_f32(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) } - unsafe { _vqrshrun_n_s64(a, const { int64x2_t([-N as i64; 2]) }) } } -#[doc = "Signed saturating rounded shift right unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s16)"] +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqrshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqrshrun_n_s16(a: int16x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sminp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrshrun.v8i8" + link_name = "llvm.aarch64.neon.sminp.v8i8" )] - fn _vqrshrun_n_s16(a: int16x8_t, n: i32) -> uint8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v8i8")] + fn _vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; } - unsafe { _vqrshrun_n_s16(a, N) } + unsafe { _vpmin_s8(a, b) } } -#[doc = "Signed saturating rounded shift right unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s32)"] +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqrshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqrshrun_n_s32(a: int32x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sminp) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrshrun.v4i16" + link_name = "llvm.aarch64.neon.sminp.v8i8" )] - fn _vqrshrun_n_s32(a: int32x4_t, n: i32) -> uint16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v8i8")] + fn _vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; } - unsafe { _vqrshrun_n_s32(a, N) } -} -#[doc = "Signed saturating rounded shift right unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s64)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqrshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqrshrun_n_s64(a: int64x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqrshrun.v2i32" - )] - fn _vqrshrun_n_s64(a: int64x2_t, n: i32) -> uint32x2_t; + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = _vpmin_s8(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } - unsafe { _vqrshrun_n_s64(a, N) } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s8)"] +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl, N = 2) + assert_instr(sminp) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -37501,21 +36580,28 @@ pub fn vqrshrun_n_s64(a: int64x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_n_s8(a: int8x8_t) -> int8x8_t { - static_assert_uimm_bits!(N, 3); - vqshl_s8(a, vdup_n_s8(N as _)) +pub fn vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sminp.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v4i16")] + fn _vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vpmin_s16(a, b) } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s8)"] +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl, N = 2) + assert_instr(sminp) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -37524,21 +36610,33 @@ pub fn vqshl_n_s8(a: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_n_s8(a: int8x16_t) -> int8x16_t { - static_assert_uimm_bits!(N, 3); - vqshlq_s8(a, vdupq_n_s8(N as _)) +pub fn vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sminp.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v4i16")] + fn _vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int16x4_t = _vpmin_s16(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s16)"] +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl, N = 2) + assert_instr(sminp) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -37547,21 +36645,28 @@ pub fn vqshlq_n_s8(a: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_n_s16(a: int16x4_t) -> int16x4_t { - static_assert_uimm_bits!(N, 4); - vqshl_s16(a, vdup_n_s16(N as _)) +pub fn vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sminp.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2i32")] + fn _vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vpmin_s32(a, b) } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s16)"] +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl, N = 2) + assert_instr(sminp) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -37570,21 +36675,33 @@ pub fn vqshl_n_s16(a: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_n_s16(a: int16x8_t) -> int16x8_t { - static_assert_uimm_bits!(N, 4); - vqshlq_s16(a, vdupq_n_s16(N as _)) +pub fn vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sminp.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2i32")] + fn _vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int32x2_t = _vpmin_s32(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s32)"] +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl, N = 2) + assert_instr(uminp) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -37593,21 +36710,28 @@ pub fn vqshlq_n_s16(a: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_n_s32(a: int32x2_t) -> int32x2_t { - static_assert_uimm_bits!(N, 5); - vqshl_s32(a, vdup_n_s32(N as _)) +pub fn vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uminp.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v8i8")] + fn _vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } + unsafe { _vpmin_u8(a, b) } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s32)"] +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl, N = 2) + assert_instr(uminp) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -37616,21 +36740,33 @@ pub fn vqshl_n_s32(a: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_n_s32(a: int32x4_t) -> int32x4_t { - static_assert_uimm_bits!(N, 5); - vqshlq_s32(a, vdupq_n_s32(N as _)) +pub fn vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uminp.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v8i8")] + fn _vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } + unsafe { + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = _vpmin_u8(a, b); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s64)"] +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl, N = 2) + assert_instr(uminp) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -37639,21 +36775,28 @@ pub fn vqshlq_n_s32(a: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_n_s64(a: int64x1_t) -> int64x1_t { - static_assert_uimm_bits!(N, 6); - vqshl_s64(a, vdup_n_s64(N as _)) +pub fn vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uminp.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v4i16")] + fn _vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } + unsafe { _vpmin_u16(a, b) } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s64)"] +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl, N = 2) + assert_instr(uminp) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -37662,21 +36805,33 @@ pub fn vqshl_n_s64(a: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_n_s64(a: int64x2_t) -> int64x2_t { - static_assert_uimm_bits!(N, 6); - vqshlq_s64(a, vdupq_n_s64(N as _)) +pub fn vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uminp.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v4i16")] + fn _vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } + unsafe { + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint16x4_t = _vpmin_u16(a, b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u8)"] +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl, N = 2) + assert_instr(uminp) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -37685,21 +36840,28 @@ pub fn vqshlq_n_s64(a: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_n_u8(a: uint8x8_t) -> uint8x8_t { - static_assert_uimm_bits!(N, 3); - vqshl_u8(a, vdup_n_s8(N as _)) +pub fn vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uminp.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v2i32")] + fn _vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } + unsafe { _vpmin_u32(a, b) } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u8)"] +#[doc = "Folding minimum of adjacent pairs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl, N = 2) + assert_instr(uminp) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -37708,21 +36870,32 @@ pub fn vqshl_n_u8(a: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_n_u8(a: uint8x16_t) -> uint8x16_t { - static_assert_uimm_bits!(N, 3); - vqshlq_u8(a, vdupq_n_s8(N as _)) +pub fn vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uminp.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v2i32")] + fn _vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } + unsafe { + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint32x2_t = _vpmin_u32(a, b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u16)"] +#[doc = "Signed saturating Absolute value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl, N = 2) + assert_instr(sqabs) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -37731,21 +36904,27 @@ pub fn vqshlq_n_u8(a: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_n_u16(a: uint16x4_t) -> uint16x4_t { - static_assert_uimm_bits!(N, 4); - vqshl_u16(a, vdup_n_s16(N as _)) +pub fn vqabs_s8(a: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqabs.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i8")] + fn _vqabs_s8(a: int8x8_t) -> int8x8_t; + } + unsafe { _vqabs_s8(a) } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u16)"] +#[doc = "Signed saturating Absolute value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl, N = 2) + assert_instr(sqabs) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -37754,21 +36933,27 @@ pub fn vqshl_n_u16(a: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_n_u16(a: uint16x8_t) -> uint16x8_t { - static_assert_uimm_bits!(N, 4); - vqshlq_u16(a, vdupq_n_s16(N as _)) +pub fn vqabsq_s8(a: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqabs.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v16i8")] + fn _vqabsq_s8(a: int8x16_t) -> int8x16_t; + } + unsafe { _vqabsq_s8(a) } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u32)"] +#[doc = "Signed saturating Absolute value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl, N = 2) + assert_instr(sqabs) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -37777,21 +36962,27 @@ pub fn vqshlq_n_u16(a: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_n_u32(a: uint32x2_t) -> uint32x2_t { - static_assert_uimm_bits!(N, 5); - vqshl_u32(a, vdup_n_s32(N as _)) +pub fn vqabs_s16(a: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqabs.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i16")] + fn _vqabs_s16(a: int16x4_t) -> int16x4_t; + } + unsafe { _vqabs_s16(a) } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u32)"] +#[doc = "Signed saturating Absolute value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl, N = 2) + assert_instr(sqabs) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -37800,21 +36991,27 @@ pub fn vqshl_n_u32(a: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_n_u32(a: uint32x4_t) -> uint32x4_t { - static_assert_uimm_bits!(N, 5); - vqshlq_u32(a, vdupq_n_s32(N as _)) +pub fn vqabsq_s16(a: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqabs.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i16")] + fn _vqabsq_s16(a: int16x8_t) -> int16x8_t; + } + unsafe { _vqabsq_s16(a) } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u64)"] +#[doc = "Signed saturating Absolute value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl, N = 2) + assert_instr(sqabs) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -37823,21 +37020,27 @@ pub fn vqshlq_n_u32(a: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_n_u64(a: uint64x1_t) -> uint64x1_t { - static_assert_uimm_bits!(N, 6); - vqshl_u64(a, vdup_n_s64(N as _)) +pub fn vqabs_s32(a: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqabs.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v2i32")] + fn _vqabs_s32(a: int32x2_t) -> int32x2_t; + } + unsafe { _vqabs_s32(a) } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u64)"] +#[doc = "Signed saturating Absolute value"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl, N = 2) + assert_instr(sqabs) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -37846,19 +37049,26 @@ pub fn vqshl_n_u64(a: uint64x1_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_n_u64(a: uint64x2_t) -> uint64x2_t { - static_assert_uimm_bits!(N, 6); - vqshlq_u64(a, vdupq_n_s64(N as _)) +pub fn vqabsq_s32(a: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqabs.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i32")] + fn _vqabsq_s32(a: int32x4_t) -> int32x4_t; + } + unsafe { _vqabsq_s32(a) } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s8)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl) + assert_instr(sqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -37868,26 +37078,18 @@ pub fn vqshlq_n_u64(a: uint64x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshl.v8i8" - )] - fn _vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - } - unsafe { _vqshl_s8(a, b) } +pub fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s8)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl) + assert_instr(sqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -37897,26 +37099,18 @@ pub fn vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v16i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshl.v16i8" - )] - fn _vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - } - unsafe { _vqshlq_s8(a, b) } +pub fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s16)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl) + assert_instr(sqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -37926,26 +37120,18 @@ pub fn vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshl.v4i16" - )] - fn _vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - } - unsafe { _vqshl_s16(a, b) } +pub fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s16)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl) + assert_instr(sqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -37955,26 +37141,18 @@ pub fn vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshl.v8i16" - )] - fn _vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - } - unsafe { _vqshlq_s16(a, b) } +pub fn vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s32)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl) + assert_instr(sqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -37984,26 +37162,18 @@ pub fn vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshl.v2i32" - )] - fn _vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - } - unsafe { _vqshl_s32(a, b) } +pub fn vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s32)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl) + assert_instr(sqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -38013,26 +37183,18 @@ pub fn vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshl.v4i32" - )] - fn _vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - } - unsafe { _vqshlq_s32(a, b) } +pub fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s64)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl) + assert_instr(sqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -38042,26 +37204,18 @@ pub fn vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v1i64")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshl.v1i64" - )] - fn _vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; - } - unsafe { _vqshl_s64(a, b) } +pub fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Signed saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s64)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqshl) + assert_instr(sqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -38071,26 +37225,18 @@ pub fn vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i64")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshl.v2i64" - )] - fn _vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - } - unsafe { _vqshlq_s64(a, b) } +pub fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u8)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl) + assert_instr(uqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -38100,26 +37246,18 @@ pub fn vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqshl.v8i8" - )] - fn _vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; - } - unsafe { _vqshl_u8(a, b) } +pub fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u8)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl) + assert_instr(uqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -38129,26 +37267,18 @@ pub fn vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v16i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqshl.v16i8" - )] - fn _vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; - } - unsafe { _vqshlq_u8(a, b) } +pub fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u16)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl) + assert_instr(uqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -38158,26 +37288,18 @@ pub fn vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqshl.v4i16" - )] - fn _vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; - } - unsafe { _vqshl_u16(a, b) } +pub fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u16)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl) + assert_instr(uqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -38187,26 +37309,18 @@ pub fn vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqshl.v8i16" - )] - fn _vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; - } - unsafe { _vqshlq_u16(a, b) } +pub fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u32)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl) + assert_instr(uqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -38216,26 +37330,18 @@ pub fn vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqshl.v2i32" - )] - fn _vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; - } - unsafe { _vqshl_u32(a, b) } +pub fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u32)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl) + assert_instr(uqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -38245,26 +37351,18 @@ pub fn vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqshl.v4i32" - )] - fn _vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; - } - unsafe { _vqshlq_u32(a, b) } +pub fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u64)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl) + assert_instr(uqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -38274,26 +37372,18 @@ pub fn vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v1i64")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqshl.v1i64" - )] - fn _vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; - } - unsafe { _vqshl_u64(a, b) } +pub fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Unsigned saturating shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u64)"] +#[doc = "Saturating add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqshl) + assert_instr(uqadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -38303,621 +37393,408 @@ pub fn vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i64")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqshl.v2i64" - )] - fn _vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; - } - unsafe { _vqshlq_u64(a, b) } +pub fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { simd_saturating_add(a, b) } } -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s8)"] +#[doc = "Vector widening saturating doubling multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_lane_s16)"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshlu_n_s8(a: int8x8_t) -> uint8x8_t { - static_assert_uimm_bits!(N, 3); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i8")] - fn _vqshlu_n_s8(a: int8x8_t, n: int8x8_t) -> uint8x8_t; - } - unsafe { _vqshlu_n_s8(a, const { int8x8_t([N as i8; 8]) }) } +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 2))] +#[cfg_attr( + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(sqdmlal, N = 2) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlal_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + static_assert_uimm_bits!(N, 2); + vqaddq_s32(a, vqdmull_lane_s16::(b, c)) } -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)"] +#[doc = "Vector widening saturating doubling multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_lane_s32)"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshluq_n_s8(a: int8x16_t) -> uint8x16_t { - static_assert_uimm_bits!(N, 3); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v16i8")] - fn _vqshluq_n_s8(a: int8x16_t, n: int8x16_t) -> uint8x16_t; - } - unsafe { _vqshluq_n_s8(a, const { int8x16_t([N as i8; 16]) }) } +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 1))] +#[cfg_attr( + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(sqdmlal, N = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlal_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + static_assert_uimm_bits!(N, 1); + vqaddq_s64(a, vqdmull_lane_s32::(b, c)) } -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)"] +#[doc = "Vector widening saturating doubling multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_n_s16)"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshlu_n_s16(a: int16x4_t) -> uint16x4_t { - static_assert_uimm_bits!(N, 4); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i16")] - fn _vqshlu_n_s16(a: int16x4_t, n: int16x4_t) -> uint16x4_t; - } - unsafe { _vqshlu_n_s16(a, const { int16x4_t([N as i16; 4]) }) } +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmlal) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { + vqaddq_s32(a, vqdmull_n_s16(b, c)) } -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)"] +#[doc = "Vector widening saturating doubling multiply accumulate with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_n_s32)"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshluq_n_s16(a: int16x8_t) -> uint16x8_t { - static_assert_uimm_bits!(N, 4); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i16")] - fn _vqshluq_n_s16(a: int16x8_t, n: int16x8_t) -> uint16x8_t; - } - unsafe { _vqshluq_n_s16(a, const { int16x8_t([N as i16; 8]) }) } +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmlal) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { + vqaddq_s64(a, vqdmull_n_s32(b, c)) } -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)"] +#[doc = "Signed saturating doubling multiply-add long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_s16)"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshlu_n_s32(a: int32x2_t) -> uint32x2_t { - static_assert_uimm_bits!(N, 5); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i32")] - fn _vqshlu_n_s32(a: int32x2_t, n: int32x2_t) -> uint32x2_t; - } - unsafe { _vqshlu_n_s32(a, const { int32x2_t([N; 2]) }) } +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmlal) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + vqaddq_s32(a, vqdmull_s16(b, c)) } -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)"] +#[doc = "Signed saturating doubling multiply-add long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_s32)"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshluq_n_s32(a: int32x4_t) -> uint32x4_t { - static_assert_uimm_bits!(N, 5); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i32")] - fn _vqshluq_n_s32(a: int32x4_t, n: int32x4_t) -> uint32x4_t; - } - unsafe { _vqshluq_n_s32(a, const { int32x4_t([N; 4]) }) } +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmlal) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + vqaddq_s64(a, vqdmull_s32(b, c)) } -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)"] +#[doc = "Vector widening saturating doubling multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_lane_s16)"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshlu_n_s64(a: int64x1_t) -> uint64x1_t { - static_assert_uimm_bits!(N, 6); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v1i64")] - fn _vqshlu_n_s64(a: int64x1_t, n: int64x1_t) -> uint64x1_t; - } - unsafe { _vqshlu_n_s64(a, const { int64x1_t([N as i64]) }) } +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 2))] +#[cfg_attr( + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(sqdmlsl, N = 2) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlsl_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + static_assert_uimm_bits!(N, 2); + vqsubq_s32(a, vqdmull_lane_s16::(b, c)) } -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s64)"] +#[doc = "Vector widening saturating doubling multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_lane_s32)"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshluq_n_s64(a: int64x2_t) -> uint64x2_t { - static_assert_uimm_bits!(N, 6); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i64")] - fn _vqshluq_n_s64(a: int64x2_t, n: int64x2_t) -> uint64x2_t; - } - unsafe { _vqshluq_n_s64(a, const { int64x2_t([N as i64; 2]) }) } +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 1))] +#[cfg_attr( + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(sqdmlsl, N = 1) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlsl_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + static_assert_uimm_bits!(N, 1); + vqsubq_s64(a, vqdmull_lane_s32::(b, c)) } -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s8)"] +#[doc = "Vector widening saturating doubling multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_n_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshlu_n_s8(a: int8x8_t) -> uint8x8_t { - static_assert_uimm_bits!(N, 3); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshlu.v8i8" - )] - fn _vqshlu_n_s8(a: int8x8_t, n: int8x8_t) -> uint8x8_t; - } - unsafe { _vqshlu_n_s8(a, const { int8x8_t([N as i8; 8]) }) } +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmlsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { + vqsubq_s32(a, vqdmull_n_s16(b, c)) } -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)"] +#[doc = "Vector widening saturating doubling multiply subtract with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_n_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshluq_n_s8(a: int8x16_t) -> uint8x16_t { - static_assert_uimm_bits!(N, 3); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshlu.v16i8" - )] - fn _vqshluq_n_s8(a: int8x16_t, n: int8x16_t) -> uint8x16_t; - } - unsafe { _vqshluq_n_s8(a, const { int8x16_t([N as i8; 16]) }) } +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmlsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { + vqsubq_s64(a, vqdmull_n_s32(b, c)) } -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)"] +#[doc = "Signed saturating doubling multiply-subtract long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshlu_n_s16(a: int16x4_t) -> uint16x4_t { - static_assert_uimm_bits!(N, 4); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshlu.v4i16" - )] - fn _vqshlu_n_s16(a: int16x4_t, n: int16x4_t) -> uint16x4_t; - } - unsafe { _vqshlu_n_s16(a, const { int16x4_t([N as i16; 4]) }) } +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmlsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + vqsubq_s32(a, vqdmull_s16(b, c)) } -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)"] +#[doc = "Signed saturating doubling multiply-subtract long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshluq_n_s16(a: int16x8_t) -> uint16x8_t { - static_assert_uimm_bits!(N, 4); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshlu.v8i16" - )] - fn _vqshluq_n_s16(a: int16x8_t, n: int16x8_t) -> uint16x8_t; - } - unsafe { _vqshluq_n_s16(a, const { int16x8_t([N as i16; 8]) }) } +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmlsl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + vqsubq_s64(a, vqdmull_s32(b, c)) } -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)"] +#[doc = "Vector saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshlu_n_s32(a: int32x2_t) -> uint32x2_t { - static_assert_uimm_bits!(N, 5); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshlu.v2i32" - )] - fn _vqshlu_n_s32(a: int32x2_t, n: int32x2_t) -> uint32x2_t; - } - unsafe { _vqshlu_n_s32(a, const { int32x2_t([N; 2]) }) } +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmulh, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmulh_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 3); + vqdmulh_s16(a, vdup_n_s16(vgetq_lane_s16::(b))) } -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)"] +#[doc = "Vector saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshluq_n_s32(a: int32x4_t) -> uint32x4_t { - static_assert_uimm_bits!(N, 5); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshlu.v4i32" - )] - fn _vqshluq_n_s32(a: int32x4_t, n: int32x4_t) -> uint32x4_t; - } - unsafe { _vqshluq_n_s32(a, const { int32x4_t([N; 4]) }) } +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmulh, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmulhq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 3); + vqdmulhq_s16(a, vdupq_n_s16(vgetq_lane_s16::(b))) } -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)"] +#[doc = "Vector saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshlu_n_s64(a: int64x1_t) -> uint64x1_t { - static_assert_uimm_bits!(N, 6); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshlu.v1i64" - )] - fn _vqshlu_n_s64(a: int64x1_t, n: int64x1_t) -> uint64x1_t; - } - unsafe { _vqshlu_n_s64(a, const { int64x1_t([N as i64]) }) } +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmulh, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmulh_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 2); + vqdmulh_s32(a, vdup_n_s32(vgetq_lane_s32::(b))) } -#[doc = "Signed saturating shift left unsigned"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s64)"] +#[doc = "Vector saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqshlu, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshluq_n_s64(a: int64x2_t) -> uint64x2_t { - static_assert_uimm_bits!(N, 6); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshlu.v2i64" - )] - fn _vqshluq_n_s64(a: int64x2_t, n: int64x2_t) -> uint64x2_t; - } - unsafe { _vqshluq_n_s64(a, const { int64x2_t([N as i64; 2]) }) } +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmulh, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmulhq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + vqdmulhq_s32(a, vdupq_n_s32(vgetq_lane_s32::(b))) } -#[doc = "Signed saturating shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s16)"] +#[doc = "Vector saturating doubling multiply high with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_n_s16)"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshrn_n_s16(a: int16x8_t) -> int8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v8i8")] - fn _vqshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t; - } - unsafe { _vqshrn_n_s16(a, const { int16x8_t([-N as i16; 8]) }) } +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqdmulh) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vqdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t { + let b: int16x4_t = vdup_n_s16(b); + vqdmulh_s16(a, b) } -#[doc = "Signed saturating shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s32)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshrn_n_s32(a: int32x4_t) -> int16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v4i16")] - fn _vqshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t; - } - unsafe { _vqshrn_n_s32(a, const { int32x4_t([-N; 4]) }) } -} -#[doc = "Signed saturating shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s64)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshrn_n_s64(a: int64x2_t) -> int32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v2i32")] - fn _vqshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t; - } - unsafe { _vqshrn_n_s64(a, const { int64x2_t([-N as i64; 2]) }) } -} -#[doc = "Signed saturating shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshrn_n_s16(a: int16x8_t) -> int8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshrn.v8i8" - )] - fn _vqshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t; - } - unsafe { _vqshrn_n_s16(a, N) } -} -#[doc = "Signed saturating shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s32)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshrn_n_s32(a: int32x4_t) -> int16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshrn.v4i16" - )] - fn _vqshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t; - } - unsafe { _vqshrn_n_s32(a, N) } -} -#[doc = "Signed saturating shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s64)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshrn_n_s64(a: int64x2_t) -> int32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshrn.v2i32" - )] - fn _vqshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t; - } - unsafe { _vqshrn_n_s64(a, N) } -} -#[doc = "Unsigned saturating shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u16)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshrn_n_u16(a: uint16x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v8i8")] - fn _vqshrn_n_u16(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t; - } - unsafe { _vqshrn_n_u16(a, const { uint16x8_t([-N as u16; 8]) }) } -} -#[doc = "Unsigned saturating shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u32)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshrn_n_u32(a: uint32x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v4i16")] - fn _vqshrn_n_u32(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t; - } - unsafe { _vqshrn_n_u32(a, const { uint32x4_t([-N as u32; 4]) }) } -} -#[doc = "Unsigned saturating shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u64)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshrn_n_u64(a: uint64x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v2i32")] - fn _vqshrn_n_u64(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t; - } - unsafe { _vqshrn_n_u64(a, const { uint64x2_t([-N as u64; 2]) }) } -} -#[doc = "Unsigned saturating shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(uqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshrn_n_u16(a: uint16x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqshrn.v8i8" - )] - fn _vqshrn_n_u16(a: uint16x8_t, n: i32) -> uint8x8_t; - } - unsafe { _vqshrn_n_u16(a, N) } -} -#[doc = "Unsigned saturating shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u32)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(uqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshrn_n_u32(a: uint32x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqshrn.v4i16" - )] - fn _vqshrn_n_u32(a: uint32x4_t, n: i32) -> uint16x4_t; - } - unsafe { _vqshrn_n_u32(a, N) } -} -#[doc = "Unsigned saturating shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u64)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(uqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshrn_n_u64(a: uint64x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.uqshrn.v2i32" - )] - fn _vqshrn_n_u64(a: uint64x2_t, n: i32) -> uint32x2_t; - } - unsafe { _vqshrn_n_u64(a, N) } -} -#[doc = "Signed saturating shift right unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s16)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshrun_n_s16(a: int16x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v8i8")] - fn _vqshrun_n_s16(a: int16x8_t, n: int16x8_t) -> uint8x8_t; - } - unsafe { _vqshrun_n_s16(a, const { int16x8_t([-N as i16; 8]) }) } -} -#[doc = "Signed saturating shift right unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s32)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshrun_n_s32(a: int32x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v4i16")] - fn _vqshrun_n_s32(a: int32x4_t, n: int32x4_t) -> uint16x4_t; - } - unsafe { _vqshrun_n_s32(a, const { int32x4_t([-N; 4]) }) } -} -#[doc = "Signed saturating shift right unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s64)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vqshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vqshrun_n_s64(a: int64x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v2i32")] - fn _vqshrun_n_s64(a: int64x2_t, n: int64x2_t) -> uint32x2_t; - } - unsafe { _vqshrun_n_s64(a, const { int64x2_t([-N as i64; 2]) }) } -} -#[doc = "Signed saturating shift right unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshrun_n_s16(a: int16x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshrun.v8i8" - )] - fn _vqshrun_n_s16(a: int16x8_t, n: i32) -> uint8x8_t; - } - unsafe { _vqshrun_n_s16(a, N) } -} -#[doc = "Signed saturating shift right unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s32)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshrun_n_s32(a: int32x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshrun.v4i16" - )] - fn _vqshrun_n_s32(a: int32x4_t, n: i32) -> uint16x4_t; - } - unsafe { _vqshrun_n_s32(a, N) } -} -#[doc = "Signed saturating shift right unsigned narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s64)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(sqshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vqshrun_n_s64(a: int64x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sqshrun.v2i32" - )] - fn _vqshrun_n_s64(a: int64x2_t, n: i32) -> uint32x2_t; - } - unsafe { _vqshrun_n_s64(a, N) } -} -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s8)"] +#[doc = "Vector saturating doubling multiply high with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqsub) + assert_instr(sqdmulh) )] #[cfg_attr( not(target_arch = "arm"), @@ -38927,18 +37804,19 @@ pub fn vqshrun_n_s64(a: int64x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t { + let b: int16x8_t = vdupq_n_s16(b); + vqdmulhq_s16(a, b) } -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s8)"] +#[doc = "Vector saturating doubling multiply high with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqsub) + assert_instr(sqdmulh) )] #[cfg_attr( not(target_arch = "arm"), @@ -38948,18 +37826,19 @@ pub fn vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t { + let b: int32x2_t = vdup_n_s32(b); + vqdmulh_s32(a, b) } -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s16)"] +#[doc = "Vector saturating doubling multiply high with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqsub) + assert_instr(sqdmulh) )] #[cfg_attr( not(target_arch = "arm"), @@ -38969,18 +37848,19 @@ pub fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t { + let b: int32x4_t = vdupq_n_s32(b); + vqdmulhq_s32(a, b) } -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s16)"] +#[doc = "Signed saturating doubling multiply returning high half"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqsub) + assert_instr(sqdmulh) )] #[cfg_attr( not(target_arch = "arm"), @@ -38990,18 +37870,26 @@ pub fn vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqdmulh.v4i16" + )] + fn _vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vqdmulh_s16(a, b) } } -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s32)"] +#[doc = "Signed saturating doubling multiply returning high half"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqsub) + assert_instr(sqdmulh) )] #[cfg_attr( not(target_arch = "arm"), @@ -39011,18 +37899,26 @@ pub fn vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqdmulh.v8i16" + )] + fn _vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { _vqdmulhq_s16(a, b) } } -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s32)"] +#[doc = "Signed saturating doubling multiply returning high half"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqsub) + assert_instr(sqdmulh) )] #[cfg_attr( not(target_arch = "arm"), @@ -39032,18 +37928,26 @@ pub fn vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqdmulh.v2i32" + )] + fn _vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vqdmulh_s32(a, b) } } -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s64)"] +#[doc = "Signed saturating doubling multiply returning high half"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqsub) + assert_instr(sqdmulh) )] #[cfg_attr( not(target_arch = "arm"), @@ -39053,19 +37957,28 @@ pub fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqdmulh.v4i32" + )] + fn _vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { _vqdmulhq_s32(a, b) } } -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s64)"] +#[doc = "Vector saturating doubling long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_lane_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sqsub) + assert_instr(sqdmull, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -39074,19 +37987,22 @@ pub fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqdmull_lane_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { + static_assert_uimm_bits!(N, 2); + let b = vdup_lane_s16::(b); + vqdmull_s16(a, b) } -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u8)"] +#[doc = "Vector saturating doubling long multiply by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_lane_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqsub) + assert_instr(sqdmull, N = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -39095,18 +38011,20 @@ pub fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqdmull_lane_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { + static_assert_uimm_bits!(N, 1); + let b = vdup_lane_s32::(b); + vqdmull_s32(a, b) } -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u8)"] +#[doc = "Vector saturating doubling long multiply with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqsub) + assert_instr(sqdmull) )] #[cfg_attr( not(target_arch = "arm"), @@ -39116,18 +38034,18 @@ pub fn vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqdmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t { + vqdmull_s16(a, vdup_n_s16(b)) } -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u16)"] +#[doc = "Vector saturating doubling long multiply with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqsub) + assert_instr(sqdmull) )] #[cfg_attr( not(target_arch = "arm"), @@ -39137,18 +38055,18 @@ pub fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqdmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t { + vqdmull_s32(a, vdup_n_s32(b)) } -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u16)"] +#[doc = "Signed saturating doubling multiply long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqsub) + assert_instr(sqdmull) )] #[cfg_attr( not(target_arch = "arm"), @@ -39158,18 +38076,26 @@ pub fn vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqdmull.v4i32" + )] + fn _vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t; + } + unsafe { _vqdmull_s16(a, b) } } -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u32)"] +#[doc = "Signed saturating doubling multiply long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqsub) + assert_instr(sqdmull) )] #[cfg_attr( not(target_arch = "arm"), @@ -39179,18 +38105,26 @@ pub fn vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v2i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqdmull.v2i64" + )] + fn _vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t; + } + unsafe { _vqdmull_s32(a, b) } } -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u32)"] +#[doc = "Signed saturating extract narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqsub) + assert_instr(sqxtn) )] #[cfg_attr( not(target_arch = "arm"), @@ -39200,18 +38134,26 @@ pub fn vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqmovn_s16(a: int16x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqxtn.v8i8" + )] + fn _vqmovn_s16(a: int16x8_t) -> int8x8_t; + } + unsafe { _vqmovn_s16(a) } } -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u64)"] +#[doc = "Signed saturating extract narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqsub) + assert_instr(sqxtn) )] #[cfg_attr( not(target_arch = "arm"), @@ -39221,18 +38163,26 @@ pub fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqmovn_s32(a: int32x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqxtn.v4i16" + )] + fn _vqmovn_s32(a: int32x4_t) -> int16x4_t; + } + unsafe { _vqmovn_s32(a) } } -#[doc = "Saturating subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u64)"] +#[doc = "Signed saturating extract narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uqsub) + assert_instr(sqxtn) )] #[cfg_attr( not(target_arch = "arm"), @@ -39242,18 +38192,26 @@ pub fn vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - unsafe { simd_saturating_sub(a, b) } +pub fn vqmovn_s64(a: int64x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqxtn.v2i32" + )] + fn _vqmovn_s64(a: int64x2_t) -> int32x2_t; + } + unsafe { _vqmovn_s64(a) } } -#[doc = "Rounding Add returning High Narrow (high half)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s16)"] +#[doc = "Unsigned saturating extract narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn2) + assert_instr(uqxtn) )] #[cfg_attr( not(target_arch = "arm"), @@ -39263,19 +38221,26 @@ pub fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t { - let x = vraddhn_s16(b, c); - unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } +pub fn vqmovn_u16(a: uint16x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqxtn.v8i8" + )] + fn _vqmovn_u16(a: uint16x8_t) -> uint8x8_t; + } + unsafe { _vqmovn_u16(a) } } -#[doc = "Rounding Add returning High Narrow (high half)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s32)"] +#[doc = "Unsigned saturating extract narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn2) + assert_instr(uqxtn) )] #[cfg_attr( not(target_arch = "arm"), @@ -39285,19 +38250,26 @@ pub fn vraddhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t { - let x = vraddhn_s32(b, c); - unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7]) } +pub fn vqmovn_u32(a: uint32x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqxtn.v4i16" + )] + fn _vqmovn_u32(a: uint32x4_t) -> uint16x4_t; + } + unsafe { _vqmovn_u32(a) } } -#[doc = "Rounding Add returning High Narrow (high half)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s64)"] +#[doc = "Unsigned saturating extract narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn2) + assert_instr(uqxtn) )] #[cfg_attr( not(target_arch = "arm"), @@ -39307,19 +38279,26 @@ pub fn vraddhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t { - let x = vraddhn_s64(b, c); - unsafe { simd_shuffle!(a, x, [0, 1, 2, 3]) } +pub fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqxtn.v2i32" + )] + fn _vqmovn_u64(a: uint64x2_t) -> uint32x2_t; + } + unsafe { _vqmovn_u64(a) } } -#[doc = "Rounding Add returning High Narrow (high half)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u16)"] +#[doc = "Signed saturating extract unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn2) + assert_instr(sqxtun) )] #[cfg_attr( not(target_arch = "arm"), @@ -39329,21 +38308,26 @@ pub fn vraddhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t { - unsafe { - let x: uint8x8_t = transmute(vraddhn_s16(transmute(b), transmute(c))); - simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) +pub fn vqmovun_s16(a: int16x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqxtun.v8i8" + )] + fn _vqmovun_s16(a: int16x8_t) -> uint8x8_t; } + unsafe { _vqmovun_s16(a) } } -#[doc = "Rounding Add returning High Narrow (high half)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u32)"] +#[doc = "Signed saturating extract unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn2) + assert_instr(sqxtun) )] #[cfg_attr( not(target_arch = "arm"), @@ -39353,21 +38337,26 @@ pub fn vraddhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_ target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t { - unsafe { - let x: uint16x4_t = transmute(vraddhn_s32(transmute(b), transmute(c))); - simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7]) +pub fn vqmovun_s32(a: int32x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqxtun.v4i16" + )] + fn _vqmovun_s32(a: int32x4_t) -> uint16x4_t; } + unsafe { _vqmovun_s32(a) } } -#[doc = "Rounding Add returning High Narrow (high half)."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u64)"] +#[doc = "Signed saturating extract unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn2) + assert_instr(sqxtun) )] #[cfg_attr( not(target_arch = "arm"), @@ -39377,21 +38366,26 @@ pub fn vraddhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t { - unsafe { - let x: uint32x2_t = transmute(vraddhn_s64(transmute(b), transmute(c))); - simd_shuffle!(a, x, [0, 1, 2, 3]) +pub fn vqmovun_s64(a: int64x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqxtun.v2i32" + )] + fn _vqmovun_s64(a: int64x2_t) -> uint32x2_t; } + unsafe { _vqmovun_s64(a) } } -#[doc = "Rounding Add returning High Narrow."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s16)"] +#[doc = "Signed saturating negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn) + assert_instr(sqneg) )] #[cfg_attr( not(target_arch = "arm"), @@ -39401,26 +38395,26 @@ pub fn vraddhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4 target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t { +pub fn vqneg_s8(a: int8x8_t) -> int8x8_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.raddhn.v8i8" + link_name = "llvm.aarch64.neon.sqneg.v8i8" )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v8i8")] - fn _vraddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i8")] + fn _vqneg_s8(a: int8x8_t) -> int8x8_t; } - unsafe { _vraddhn_s16(a, b) } + unsafe { _vqneg_s8(a) } } -#[doc = "Rounding Add returning High Narrow."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s32)"] +#[doc = "Signed saturating negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn) + assert_instr(sqneg) )] #[cfg_attr( not(target_arch = "arm"), @@ -39430,26 +38424,26 @@ pub fn vraddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t { +pub fn vqnegq_s8(a: int8x16_t) -> int8x16_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.raddhn.v4i16" + link_name = "llvm.aarch64.neon.sqneg.v16i8" )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v4i16")] - fn _vraddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v16i8")] + fn _vqnegq_s8(a: int8x16_t) -> int8x16_t; } - unsafe { _vraddhn_s32(a, b) } + unsafe { _vqnegq_s8(a) } } -#[doc = "Rounding Add returning High Narrow."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s64)"] +#[doc = "Signed saturating negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn) + assert_instr(sqneg) )] #[cfg_attr( not(target_arch = "arm"), @@ -39459,27 +38453,26 @@ pub fn vraddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t { +pub fn vqneg_s16(a: int16x4_t) -> int16x4_t { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.raddhn.v2i32" + link_name = "llvm.aarch64.neon.sqneg.v4i16" )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v2i32")] - fn _vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i16")] + fn _vqneg_s16(a: int16x4_t) -> int16x4_t; } - unsafe { _vraddhn_s64(a, b) } + unsafe { _vqneg_s16(a) } } -#[doc = "Rounding Add returning High Narrow."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u16)"] +#[doc = "Signed saturating negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn) + assert_instr(sqneg) )] #[cfg_attr( not(target_arch = "arm"), @@ -39489,19 +38482,26 @@ pub fn vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { - unsafe { transmute(vraddhn_s16(transmute(a), transmute(b))) } +pub fn vqnegq_s16(a: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqneg.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i16")] + fn _vqnegq_s16(a: int16x8_t) -> int16x8_t; + } + unsafe { _vqnegq_s16(a) } } -#[doc = "Rounding Add returning High Narrow."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u16)"] +#[doc = "Signed saturating negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn) + assert_instr(sqneg) )] #[cfg_attr( not(target_arch = "arm"), @@ -39511,24 +38511,26 @@ pub fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(vraddhn_s16(transmute(a), transmute(b))); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +pub fn vqneg_s32(a: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqneg.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v2i32")] + fn _vqneg_s32(a: int32x2_t) -> int32x2_t; } + unsafe { _vqneg_s32(a) } } -#[doc = "Rounding Add returning High Narrow."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u32)"] +#[doc = "Signed saturating negate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn) + assert_instr(sqneg) )] #[cfg_attr( not(target_arch = "arm"), @@ -39538,20 +38540,28 @@ pub fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { - unsafe { transmute(vraddhn_s32(transmute(a), transmute(b))) } +pub fn vqnegq_s32(a: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqneg.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i32")] + fn _vqnegq_s32(a: int32x4_t) -> int32x4_t; + } + unsafe { _vqnegq_s32(a) } } -#[doc = "Rounding Add returning High Narrow."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u32)"] +#[doc = "Vector rounding saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_lane_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn) + assert_instr(sqrdmulh, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -39560,25 +38570,22 @@ pub fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); - let ret_val: uint16x4_t = transmute(vraddhn_s32(transmute(a), transmute(b))); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vqrdmulh_lane_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 2); + let b = vdup_lane_s16::(b); + vqrdmulh_s16(a, b) } -#[doc = "Rounding Add returning High Narrow."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u64)"] +#[doc = "Vector rounding saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_lane_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn) + assert_instr(sqrdmulh, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -39587,20 +38594,22 @@ pub fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { - unsafe { transmute(vraddhn_s64(transmute(a), transmute(b))) } +pub fn vqrdmulh_lane_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + let b = vdup_lane_s32::(b); + vqrdmulh_s32(a, b) } -#[doc = "Rounding Add returning High Narrow."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u64)"] +#[doc = "Vector rounding saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_laneq_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(raddhn) + assert_instr(sqrdmulh, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -39609,84 +38618,70 @@ pub fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { - unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); - let ret_val: uint32x2_t = transmute(vraddhn_s64(transmute(a), transmute(b))); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vqrdmulh_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 3); + let b = vdup_laneq_s16::(b); + vqrdmulh_s16(a, b) } -#[doc = "Reciprocal estimate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f16)"] +#[doc = "Vector rounding saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_laneq_s32)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frecpe) + assert_instr(sqrdmulh, LANE = 1) )] -#[target_feature(enable = "neon,fp16")] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vrecpe_f16(a: float16x4_t) -> float16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frecpe.v4f16" - )] - fn _vrecpe_f16(a: float16x4_t) -> float16x4_t; - } - unsafe { _vrecpe_f16(a) } +pub fn vqrdmulh_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 2); + let b = vdup_laneq_s32::(b); + vqrdmulh_s32(a, b) } -#[doc = "Reciprocal estimate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f16)"] +#[doc = "Vector rounding saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_lane_s16)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frecpe) + assert_instr(sqrdmulh, LANE = 1) )] -#[target_feature(enable = "neon,fp16")] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vrecpeq_f16(a: float16x8_t) -> float16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v8f16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frecpe.v8f16" - )] - fn _vrecpeq_f16(a: float16x8_t) -> float16x8_t; - } - unsafe { _vrecpeq_f16(a) } +pub fn vqrdmulhq_lane_s16(a: int16x8_t, b: int16x4_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 2); + let b = vdupq_lane_s16::(b); + vqrdmulhq_s16(a, b) } -#[doc = "Reciprocal estimate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f32)"] +#[doc = "Vector rounding saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_lane_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frecpe) + assert_instr(sqrdmulh, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -39695,27 +38690,22 @@ pub fn vrecpeq_f16(a: float16x8_t) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrecpe_f32(a: float32x2_t) -> float32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2f32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frecpe.v2f32" - )] - fn _vrecpe_f32(a: float32x2_t) -> float32x2_t; - } - unsafe { _vrecpe_f32(a) } +pub fn vqrdmulhq_lane_s32(a: int32x4_t, b: int32x2_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 1); + let b = vdupq_lane_s32::(b); + vqrdmulhq_s32(a, b) } -#[doc = "Reciprocal estimate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f32)"] +#[doc = "Vector rounding saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_laneq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frecpe) + assert_instr(sqrdmulh, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -39724,27 +38714,22 @@ pub fn vrecpe_f32(a: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrecpeq_f32(a: float32x4_t) -> float32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frecpe.v4f32" - )] - fn _vrecpeq_f32(a: float32x4_t) -> float32x4_t; - } - unsafe { _vrecpeq_f32(a) } +pub fn vqrdmulhq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 3); + let b = vdupq_laneq_s16::(b); + vqrdmulhq_s16(a, b) } -#[doc = "Unsigned reciprocal estimate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_u32)"] +#[doc = "Vector rounding saturating doubling multiply high by scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_laneq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urecpe) + assert_instr(sqrdmulh, LANE = 1) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -39753,26 +38738,20 @@ pub fn vrecpeq_f32(a: float32x4_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrecpe_u32(a: uint32x2_t) -> uint32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urecpe.v2i32" - )] - fn _vrecpe_u32(a: uint32x2_t) -> uint32x2_t; - } - unsafe { _vrecpe_u32(a) } +pub fn vqrdmulhq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + let b = vdupq_laneq_s32::(b); + vqrdmulhq_s32(a, b) } -#[doc = "Unsigned reciprocal estimate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_u32)"] +#[doc = "Vector saturating rounding doubling multiply high with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urecpe) + assert_instr(sqrdmulh) )] #[cfg_attr( not(target_arch = "arm"), @@ -39782,86 +38761,60 @@ pub fn vrecpe_u32(a: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrecpeq_u32(a: uint32x4_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urecpe.v4i32" - )] - fn _vrecpeq_u32(a: uint32x4_t) -> uint32x4_t; - } - unsafe { _vrecpeq_u32(a) } +pub fn vqrdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t { + vqrdmulh_s16(a, vdup_n_s16(b)) } -#[doc = "Floating-point reciprocal step"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f16)"] +#[doc = "Vector saturating rounding doubling multiply high with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_n_s16)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frecps) + assert_instr(sqrdmulh) )] -#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vrecps_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frecps.v4f16" - )] - fn _vrecps_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; - } - unsafe { _vrecps_f16(a, b) } +pub fn vqrdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t { + vqrdmulhq_s16(a, vdupq_n_s16(b)) } -#[doc = "Floating-point reciprocal step"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f16)"] +#[doc = "Vector saturating rounding doubling multiply high with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_n_s32)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frecps) + assert_instr(sqrdmulh) )] -#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vrecpsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v8f16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frecps.v8f16" - )] - fn _vrecpsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; - } - unsafe { _vrecpsq_f16(a, b) } +pub fn vqrdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t { + vqrdmulh_s32(a, vdup_n_s32(b)) } -#[doc = "Floating-point reciprocal step"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f32)"] +#[doc = "Vector saturating rounding doubling multiply high with scalar"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frecps) + assert_instr(sqrdmulh) )] #[cfg_attr( not(target_arch = "arm"), @@ -39871,26 +38824,18 @@ pub fn vrecpsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v2f32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frecps.v2f32" - )] - fn _vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - } - unsafe { _vrecps_f32(a, b) } +pub fn vqrdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t { + vqrdmulhq_s32(a, vdupq_n_s32(b)) } -#[doc = "Floating-point reciprocal step"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f32)"] +#[doc = "Signed saturating rounding doubling multiply returning high half"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frecps) + assert_instr(sqrdmulh) )] #[cfg_attr( not(target_arch = "arm"), @@ -39900,2337 +38845,2968 @@ pub fn vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { +pub fn vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f32")] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frecps.v4f32" + link_name = "llvm.aarch64.neon.sqrdmulh.v4i16" )] - fn _vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + fn _vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; } - unsafe { _vrecpsq_f32(a, b) } + unsafe { _vqrdmulh_s16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f16)"] +#[doc = "Signed saturating rounding doubling multiply returning high half"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqrdmulh) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f32_f16(a: float16x4_t) -> float32x2_t { - unsafe { transmute(a) } +pub fn vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrdmulh.v8i16" + )] + fn _vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { _vqrdmulhq_s16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f16)"] +#[doc = "Signed saturating rounding doubling multiply returning high half"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqrdmulh) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f32_f16(a: float16x4_t) -> float32x2_t { - unsafe { - let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) +pub fn vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrdmulh.v2i32" + )] + fn _vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; } + unsafe { _vqrdmulh_s32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f16)"] +#[doc = "Signed saturating rounding doubling multiply returning high half"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqrdmulh) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t { - unsafe { transmute(a) } +pub fn vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrdmulh.v4i32" + )] + fn _vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { _vqrdmulhq_s32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f16)"] +#[doc = "Signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t { - unsafe { - let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +pub fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshl.v8i8" + )] + fn _vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; } + unsafe { _vqrshl_s8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f16)"] +#[doc = "Signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t { - unsafe { transmute(a) } +pub fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshl.v16i8" + )] + fn _vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } + unsafe { _vqrshlq_s8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f16)"] +#[doc = "Signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t { - unsafe { - let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +pub fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshl.v4i16" + )] + fn _vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; } + unsafe { _vqrshl_s16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f16)"] +#[doc = "Signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t { - unsafe { transmute(a) } +pub fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshl.v8i16" + )] + fn _vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { _vqrshlq_s16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f16)"] +#[doc = "Signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t { - unsafe { - let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) +pub fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshl.v2i32" + )] + fn _vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; } + unsafe { _vqrshl_s32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f16)"] +#[doc = "Signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t { - unsafe { transmute(a) } +pub fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshl.v4i32" + )] + fn _vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { _vqrshlq_s32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f16)"] +#[doc = "Signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t { - unsafe { - let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) +pub fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v1i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshl.v1i64" + )] + fn _vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; } + unsafe { _vqrshl_s64(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f16)"] +#[doc = "Signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t { - unsafe { transmute(a) } +pub fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshl.v2i64" + )] + fn _vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; + } + unsafe { _vqrshlq_s64(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f16)"] +#[doc = "Unsigned signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t { - unsafe { - let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +pub fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshl.v8i8" + )] + fn _vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; } + unsafe { _vqrshl_u8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f16)"] +#[doc = "Unsigned signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t { - unsafe { transmute(a) } +pub fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshl.v16i8" + )] + fn _vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; + } + unsafe { _vqrshlq_u8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f16)"] +#[doc = "Unsigned signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t { - unsafe { - let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +pub fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshl.v4i16" + )] + fn _vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; } + unsafe { _vqrshl_u16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f16)"] +#[doc = "Unsigned signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t { - unsafe { transmute(a) } +pub fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshl.v8i16" + )] + fn _vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; + } + unsafe { _vqrshlq_u16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f16)"] +#[doc = "Unsigned signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t { - unsafe { - let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) +pub fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshl.v2i32" + )] + fn _vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; } + unsafe { _vqrshl_u32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f16)"] +#[doc = "Unsigned signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t { - unsafe { transmute(a) } +pub fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshl.v4i32" + )] + fn _vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; + } + unsafe { _vqrshlq_u32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f16)"] +#[doc = "Unsigned signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t { - unsafe { - let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) +pub fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v1i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshl.v1i64" + )] + fn _vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; } + unsafe { _vqrshl_u64(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f16)"] +#[doc = "Unsigned signed saturating rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqrshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t { - unsafe { transmute(a) } +pub fn vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshl.v2i64" + )] + fn _vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; + } + unsafe { _vqrshlq_u64(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f16)"] +#[doc = "Signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqrshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v8i8")] + fn _vqrshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t; + } + unsafe { _vqrshrn_n_s16(a, const { int16x8_t([-N as i16; 8]) }) } +} +#[doc = "Signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqrshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v4i16")] + fn _vqrshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t; + } + unsafe { _vqrshrn_n_s32(a, const { int32x4_t([-N; 4]) }) } +} +#[doc = "Signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqrshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v2i32")] + fn _vqrshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t; + } + unsafe { _vqrshrn_n_s64(a, const { int64x2_t([-N as i64; 2]) }) } +} +#[doc = "Signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqrshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshrn.v8i8" + )] + fn _vqrshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t; + } + unsafe { _vqrshrn_n_s16(a, N) } +} +#[doc = "Signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqrshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshrn.v4i16" + )] + fn _vqrshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t; + } + unsafe { _vqrshrn_n_s32(a, N) } +} +#[doc = "Signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqrshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshrn.v2i32" + )] + fn _vqrshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t; + } + unsafe { _vqrshrn_n_s64(a, N) } +} +#[doc = "Unsigned signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqrshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v8i8")] + fn _vqrshrn_n_u16(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t; + } + unsafe { _vqrshrn_n_u16(a, const { uint16x8_t([-N as u16; 8]) }) } +} +#[doc = "Unsigned signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqrshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v4i16")] + fn _vqrshrn_n_u32(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t; + } + unsafe { _vqrshrn_n_u32(a, const { uint32x4_t([-N as u32; 4]) }) } +} +#[doc = "Unsigned signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v2i32")] + fn _vqrshrn_n_u64(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t; + } + unsafe { _vqrshrn_n_u64(a, const { uint64x2_t([-N as u64; 2]) }) } +} +#[doc = "Unsigned signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(uqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqrshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshrn.v8i8" + )] + fn _vqrshrn_n_u16(a: uint16x8_t, n: i32) -> uint8x8_t; + } + unsafe { _vqrshrn_n_u16(a, N) } +} +#[doc = "Unsigned signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(uqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqrshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshrn.v4i16" + )] + fn _vqrshrn_n_u32(a: uint32x4_t, n: i32) -> uint16x4_t; + } + unsafe { _vqrshrn_n_u32(a, N) } +} +#[doc = "Unsigned signed saturating rounded shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(uqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqrshrn.v2i32" + )] + fn _vqrshrn_n_u64(a: uint64x2_t, n: i32) -> uint32x2_t; + } + unsafe { _vqrshrn_n_u64(a, N) } +} +#[doc = "Signed saturating rounded shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqrshrun_n_s16(a: int16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v8i8")] + fn _vqrshrun_n_s16(a: int16x8_t, n: int16x8_t) -> uint8x8_t; + } + unsafe { _vqrshrun_n_s16(a, const { int16x8_t([-N as i16; 8]) }) } +} +#[doc = "Signed saturating rounded shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqrshrun_n_s32(a: int32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v4i16")] + fn _vqrshrun_n_s32(a: int32x4_t, n: int32x4_t) -> uint16x4_t; + } + unsafe { _vqrshrun_n_s32(a, const { int32x4_t([-N; 4]) }) } +} +#[doc = "Signed saturating rounded shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqrshrun_n_s64(a: int64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v2i32")] + fn _vqrshrun_n_s64(a: int64x2_t, n: int64x2_t) -> uint32x2_t; + } + unsafe { _vqrshrun_n_s64(a, const { int64x2_t([-N as i64; 2]) }) } +} +#[doc = "Signed saturating rounded shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqrshrun_n_s16(a: int16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshrun.v8i8" + )] + fn _vqrshrun_n_s16(a: int16x8_t, n: i32) -> uint8x8_t; + } + unsafe { _vqrshrun_n_s16(a, N) } +} +#[doc = "Signed saturating rounded shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqrshrun_n_s32(a: int32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshrun.v4i16" + )] + fn _vqrshrun_n_s32(a: int32x4_t, n: i32) -> uint16x4_t; + } + unsafe { _vqrshrun_n_s32(a, N) } +} +#[doc = "Signed saturating rounded shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqrshrun_n_s64(a: int64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqrshrun.v2i32" + )] + fn _vqrshrun_n_s64(a: int64x2_t, n: i32) -> uint32x2_t; + } + unsafe { _vqrshrun_n_s64(a, N) } +} +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t { - unsafe { - let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vqshl_n_s8(a: int8x8_t) -> int8x8_t { + static_assert_uimm_bits!(N, 3); + vqshl_s8(a, vdup_n_s8(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f16)"] +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t { - unsafe { transmute(a) } +pub fn vqshlq_n_s8(a: int8x16_t) -> int8x16_t { + static_assert_uimm_bits!(N, 3); + vqshlq_s8(a, vdupq_n_s8(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f16)"] +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t { - unsafe { - let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vqshl_n_s16(a: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(N, 4); + vqshl_s16(a, vdup_n_s16(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f16)"] +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t { - unsafe { transmute(a) } +pub fn vqshlq_n_s16(a: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(N, 4); + vqshlq_s16(a, vdupq_n_s16(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f16)"] +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t { - unsafe { - let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vqshl_n_s32(a: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(N, 5); + vqshl_s32(a, vdup_n_s32(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f16)"] +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t { - unsafe { transmute(a) } +pub fn vqshlq_n_s32(a: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(N, 5); + vqshlq_s32(a, vdupq_n_s32(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f16)"] +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t { - unsafe { - let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vqshl_n_s64(a: int64x1_t) -> int64x1_t { + static_assert_uimm_bits!(N, 6); + vqshl_s64(a, vdup_n_s64(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f16)"] +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t { - unsafe { transmute(a) } +pub fn vqshlq_n_s64(a: int64x2_t) -> int64x2_t { + static_assert_uimm_bits!(N, 6); + vqshlq_s64(a, vdupq_n_s64(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f16)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t { - unsafe { - let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vqshl_n_u8(a: uint8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(N, 3); + vqshl_u8(a, vdup_n_s8(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f16)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t { - unsafe { transmute(a) } +pub fn vqshlq_n_u8(a: uint8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 3); + vqshlq_u8(a, vdupq_n_s8(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f16)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t { - unsafe { - let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vqshl_n_u16(a: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 4); + vqshl_u16(a, vdup_n_s16(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f16)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t { - unsafe { transmute(a) } +pub fn vqshlq_n_u16(a: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 4); + vqshlq_u16(a, vdupq_n_s16(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f16)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t { - unsafe { - let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vqshl_n_u32(a: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(N, 5); + vqshl_u32(a, vdup_n_s32(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f16)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t { - unsafe { transmute(a) } +pub fn vqshlq_n_u32(a: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(N, 5); + vqshlq_u32(a, vdupq_n_s32(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f16)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t { - unsafe { - let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vqshl_n_u64(a: uint64x1_t) -> uint64x1_t { + static_assert_uimm_bits!(N, 6); + vqshl_u64(a, vdup_n_s64(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f16)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t { - unsafe { transmute(a) } +pub fn vqshlq_n_u64(a: uint64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(N, 6); + vqshlq_u64(a, vdupq_n_s64(N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f16)"] +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t { - unsafe { - let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +pub fn vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshl.v8i8" + )] + fn _vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; } + unsafe { _vqshl_s8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f16)"] +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t { - unsafe { transmute(a) } +pub fn vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshl.v16i8" + )] + fn _vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } + unsafe { _vqshlq_s8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f16)"] +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t { - unsafe { - let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +pub fn vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshl.v4i16" + )] + fn _vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; } + unsafe { _vqshl_s16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f16)"] +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t { - unsafe { transmute(a) } +pub fn vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshl.v8i16" + )] + fn _vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { _vqshlq_s16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f16)"] +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t { - unsafe { - let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) +pub fn vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshl.v2i32" + )] + fn _vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; } + unsafe { _vqshl_s32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f16)"] +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t { - unsafe { transmute(a) } +pub fn vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshl.v4i32" + )] + fn _vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { _vqshlq_s32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f16)"] +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t { - unsafe { - let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) +pub fn vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v1i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshl.v1i64" + )] + fn _vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; } + unsafe { _vqshl_s64(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f16)"] +#[doc = "Signed saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t { - unsafe { transmute(a) } +pub fn vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshl.v2i64" + )] + fn _vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; + } + unsafe { _vqshlq_s64(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f16)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t { - unsafe { - let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +pub fn vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshl.v8i8" + )] + fn _vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; } + unsafe { _vqshl_u8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f32)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t { - unsafe { transmute(a) } +pub fn vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshl.v16i8" + )] + fn _vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; + } + unsafe { _vqshlq_u8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f32)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t { - unsafe { - let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +pub fn vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshl.v4i16" + )] + fn _vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; } + unsafe { _vqshl_u16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f32)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t { - unsafe { transmute(a) } +pub fn vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshl.v8i16" + )] + fn _vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; + } + unsafe { _vqshlq_u16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f32)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t { - unsafe { - let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +pub fn vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshl.v2i32" + )] + fn _vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; } + unsafe { _vqshl_u32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s8)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t { - unsafe { transmute(a) } +pub fn vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshl.v4i32" + )] + fn _vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; + } + unsafe { _vqshlq_u32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s8)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t { - unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +pub fn vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v1i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshl.v1i64" + )] + fn _vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; } + unsafe { _vqshl_u64(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s8)"] +#[doc = "Unsigned saturating shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqshl) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t { - unsafe { transmute(a) } +pub fn vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshl.v2i64" + )] + fn _vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; + } + unsafe { _vqshlq_u64(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s8)"] +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s8)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t { - unsafe { - let a: int8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshlu_n_s8(a: int8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i8")] + fn _vqshlu_n_s8(a: int8x8_t, n: int8x8_t) -> uint8x8_t; } + unsafe { _vqshlu_n_s8(a, const { int8x8_t([N as i8; 8]) }) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s16)"] +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_s16(a: int16x4_t) -> float16x4_t { - unsafe { transmute(a) } +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshluq_n_s8(a: int8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v16i8")] + fn _vqshluq_n_s8(a: int8x16_t, n: int8x16_t) -> uint8x16_t; + } + unsafe { _vqshluq_n_s8(a, const { int8x16_t([N as i8; 16]) }) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s16)"] +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_s16(a: int16x4_t) -> float16x4_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshlu_n_s16(a: int16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 4); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i16")] + fn _vqshlu_n_s16(a: int16x4_t, n: int16x4_t) -> uint16x4_t; } + unsafe { _vqshlu_n_s16(a, const { int16x4_t([N as i16; 4]) }) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s16)"] +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_s16(a: int16x8_t) -> float16x8_t { - unsafe { transmute(a) } +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshluq_n_s16(a: int16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 4); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i16")] + fn _vqshluq_n_s16(a: int16x8_t, n: int16x8_t) -> uint16x8_t; + } + unsafe { _vqshluq_n_s16(a, const { int16x8_t([N as i16; 8]) }) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s16)"] +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_s16(a: int16x8_t) -> float16x8_t { - unsafe { - let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshlu_n_s32(a: int32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(N, 5); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i32")] + fn _vqshlu_n_s32(a: int32x2_t, n: int32x2_t) -> uint32x2_t; } + unsafe { _vqshlu_n_s32(a, const { int32x2_t([N; 2]) }) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s32)"] +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t { - unsafe { transmute(a) } +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshluq_n_s32(a: int32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(N, 5); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i32")] + fn _vqshluq_n_s32(a: int32x4_t, n: int32x4_t) -> uint32x4_t; + } + unsafe { _vqshluq_n_s32(a, const { int32x4_t([N; 4]) }) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s32)"] +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t { - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshlu_n_s64(a: int64x1_t) -> uint64x1_t { + static_assert_uimm_bits!(N, 6); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v1i64")] + fn _vqshlu_n_s64(a: int64x1_t, n: int64x1_t) -> uint64x1_t; } + unsafe { _vqshlu_n_s64(a, const { int64x1_t([N as i64]) }) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s32)"] +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshluq_n_s64(a: int64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(N, 6); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i64")] + fn _vqshluq_n_s64(a: int64x2_t, n: int64x2_t) -> uint64x2_t; + } + unsafe { _vqshluq_n_s64(a, const { int64x2_t([N as i64; 2]) }) } +} +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t { - unsafe { transmute(a) } +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshlu_n_s8(a: int8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshlu.v8i8" + )] + fn _vqshlu_n_s8(a: int8x8_t, n: int8x8_t) -> uint8x8_t; + } + unsafe { _vqshlu_n_s8(a, const { int8x8_t([N as i8; 8]) }) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s32)"] +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t { - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshluq_n_s8(a: int8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshlu.v16i8" + )] + fn _vqshluq_n_s8(a: int8x16_t, n: int8x16_t) -> uint8x16_t; } + unsafe { _vqshluq_n_s8(a, const { int8x16_t([N as i8; 16]) }) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s64)"] +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t { - unsafe { transmute(a) } +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshlu_n_s16(a: int16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 4); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshlu.v4i16" + )] + fn _vqshlu_n_s16(a: int16x4_t, n: int16x4_t) -> uint16x4_t; + } + unsafe { _vqshlu_n_s16(a, const { int16x4_t([N as i16; 4]) }) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s64)"] +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t { - unsafe { - let ret_val: float16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshluq_n_s16(a: int16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 4); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshlu.v8i16" + )] + fn _vqshluq_n_s16(a: int16x8_t, n: int16x8_t) -> uint16x8_t; } + unsafe { _vqshluq_n_s16(a, const { int16x8_t([N as i16; 8]) }) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s64)"] +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t { - unsafe { transmute(a) } +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshlu_n_s32(a: int32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(N, 5); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshlu.v2i32" + )] + fn _vqshlu_n_s32(a: int32x2_t, n: int32x2_t) -> uint32x2_t; + } + unsafe { _vqshlu_n_s32(a, const { int32x2_t([N; 2]) }) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s64)"] +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t { - unsafe { - let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshluq_n_s32(a: int32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(N, 5); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshlu.v4i32" + )] + fn _vqshluq_n_s32(a: int32x4_t, n: int32x4_t) -> uint32x4_t; } + unsafe { _vqshluq_n_s32(a, const { int32x4_t([N; 4]) }) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u8)"] +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshlu_n_s64(a: int64x1_t) -> uint64x1_t { + static_assert_uimm_bits!(N, 6); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshlu.v1i64" + )] + fn _vqshlu_n_s64(a: int64x1_t, n: int64x1_t) -> uint64x1_t; + } + unsafe { _vqshlu_n_s64(a, const { int64x1_t([N as i64]) }) } +} +#[doc = "Signed saturating shift left unsigned"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshluq_n_s64(a: int64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(N, 6); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshlu.v2i64" + )] + fn _vqshluq_n_s64(a: int64x2_t, n: int64x2_t) -> uint64x2_t; + } + unsafe { _vqshluq_n_s64(a, const { int64x2_t([N as i64; 2]) }) } +} +#[doc = "Signed saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v8i8")] + fn _vqshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t; + } + unsafe { _vqshrn_n_s16(a, const { int16x8_t([-N as i16; 8]) }) } +} +#[doc = "Signed saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v4i16")] + fn _vqshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t; + } + unsafe { _vqshrn_n_s32(a, const { int32x4_t([-N; 4]) }) } +} +#[doc = "Signed saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v2i32")] + fn _vqshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t; + } + unsafe { _vqshrn_n_s64(a, const { int64x2_t([-N as i64; 2]) }) } +} +#[doc = "Signed saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshrn.v8i8" + )] + fn _vqshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t; + } + unsafe { _vqshrn_n_s16(a, N) } +} +#[doc = "Signed saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshrn.v4i16" + )] + fn _vqshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t; + } + unsafe { _vqshrn_n_s32(a, N) } +} +#[doc = "Signed saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshrn.v2i32" + )] + fn _vqshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t; + } + unsafe { _vqshrn_n_s64(a, N) } +} +#[doc = "Unsigned saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v8i8")] + fn _vqshrn_n_u16(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t; + } + unsafe { _vqshrn_n_u16(a, const { uint16x8_t([-N as u16; 8]) }) } +} +#[doc = "Unsigned saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v4i16")] + fn _vqshrn_n_u32(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t; + } + unsafe { _vqshrn_n_u32(a, const { uint32x4_t([-N as u32; 4]) }) } +} +#[doc = "Unsigned saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v2i32")] + fn _vqshrn_n_u64(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t; + } + unsafe { _vqshrn_n_u64(a, const { uint64x2_t([-N as u64; 2]) }) } +} +#[doc = "Unsigned saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(uqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshrn.v8i8" + )] + fn _vqshrn_n_u16(a: uint16x8_t, n: i32) -> uint8x8_t; + } + unsafe { _vqshrn_n_u16(a, N) } +} +#[doc = "Unsigned saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(uqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshrn.v4i16" + )] + fn _vqshrn_n_u32(a: uint32x4_t, n: i32) -> uint16x4_t; + } + unsafe { _vqshrn_n_u32(a, N) } +} +#[doc = "Unsigned saturating shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(uqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.uqshrn.v2i32" + )] + fn _vqshrn_n_u64(a: uint64x2_t, n: i32) -> uint32x2_t; + } + unsafe { _vqshrn_n_u64(a, N) } +} +#[doc = "Signed saturating shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshrun_n_s16(a: int16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v8i8")] + fn _vqshrun_n_s16(a: int16x8_t, n: int16x8_t) -> uint8x8_t; + } + unsafe { _vqshrun_n_s16(a, const { int16x8_t([-N as i16; 8]) }) } +} +#[doc = "Signed saturating shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshrun_n_s32(a: int32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v4i16")] + fn _vqshrun_n_s32(a: int32x4_t, n: int32x4_t) -> uint16x4_t; + } + unsafe { _vqshrun_n_s32(a, const { int32x4_t([-N; 4]) }) } +} +#[doc = "Signed saturating shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vqshrun_n_s64(a: int64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v2i32")] + fn _vqshrun_n_s64(a: int64x2_t, n: int64x2_t) -> uint32x2_t; + } + unsafe { _vqshrun_n_s64(a, const { int64x2_t([-N as i64; 2]) }) } +} +#[doc = "Signed saturating shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshrun_n_s16(a: int16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshrun.v8i8" + )] + fn _vqshrun_n_s16(a: int16x8_t, n: i32) -> uint8x8_t; + } + unsafe { _vqshrun_n_s16(a, N) } +} +#[doc = "Signed saturating shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshrun_n_s32(a: int32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshrun.v4i16" + )] + fn _vqshrun_n_s32(a: int32x4_t, n: i32) -> uint16x4_t; + } + unsafe { _vqshrun_n_s32(a, N) } +} +#[doc = "Signed saturating shift right unsigned narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(sqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vqshrun_n_s64(a: int64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sqshrun.v2i32" + )] + fn _vqshrun_n_s64(a: int64x2_t, n: i32) -> uint32x2_t; + } + unsafe { _vqshrun_n_s64(a, N) } +} +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t { - unsafe { transmute(a) } +pub fn vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u8)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u8)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t { - unsafe { transmute(a) } +pub fn vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u8)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t { - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u16)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_u16(a: uint16x4_t) -> float16x4_t { - unsafe { transmute(a) } +pub fn vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u16)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_u16(a: uint16x4_t) -> float16x4_t { - unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u16)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_u16(a: uint16x8_t) -> float16x8_t { - unsafe { transmute(a) } +pub fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u16)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(sqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_u16(a: uint16x8_t) -> float16x8_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u32)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t { - unsafe { transmute(a) } +pub fn vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u32)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t { - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u32)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t { - unsafe { transmute(a) } +pub fn vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u32)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u64)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t { - unsafe { transmute(a) } +pub fn vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u64)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t { - unsafe { - let ret_val: float16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u64)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t { - unsafe { transmute(a) } +pub fn vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u64)"] +#[doc = "Saturating subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(uqsub) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t { - unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { simd_saturating_sub(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p8)"] +#[doc = "Rounding Add returning High Narrow (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(raddhn2) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t { - unsafe { transmute(a) } +pub fn vraddhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t { + let x = vraddhn_s16(b, c); + vcombine_s8(a, x) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p8)"] +#[doc = "Rounding Add returning High Narrow (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(raddhn2) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t { - unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vraddhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t { + let x = vraddhn_s32(b, c); + vcombine_s16(a, x) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p8)"] +#[doc = "Rounding Add returning High Narrow (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(raddhn2) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t { - unsafe { transmute(a) } +pub fn vraddhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t { + let x = vraddhn_s64(b, c); + vcombine_s32(a, x) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p8)"] +#[doc = "Rounding Add returning High Narrow (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(raddhn2) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t { +pub fn vraddhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t { unsafe { - let a: poly8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let x: uint8x8_t = transmute(vraddhn_s16(transmute(b), transmute(c))); + vcombine_u8(a, x) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p16)"] +#[doc = "Rounding Add returning High Narrow (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(raddhn2) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_p16(a: poly16x4_t) -> float16x4_t { - unsafe { transmute(a) } +pub fn vraddhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t { + unsafe { + let x: uint16x4_t = transmute(vraddhn_s32(transmute(b), transmute(c))); + vcombine_u16(a, x) + } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p16)"] +#[doc = "Rounding Add returning High Narrow (high half)."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(raddhn2) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_p16(a: poly16x4_t) -> float16x4_t { +pub fn vraddhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t { unsafe { - let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + let x: uint32x2_t = transmute(vraddhn_s64(transmute(b), transmute(c))); + vcombine_u32(a, x) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p16)"] +#[doc = "Rounding Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(raddhn) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_p16(a: poly16x8_t) -> float16x8_t { - unsafe { transmute(a) } +pub fn vraddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.raddhn.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v8i8")] + fn _vraddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t; + } + unsafe { _vraddhn_s16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p16)"] +#[doc = "Rounding Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(raddhn) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_p16(a: poly16x8_t) -> float16x8_t { - unsafe { - let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +pub fn vraddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.raddhn.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v4i16")] + fn _vraddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t; } + unsafe { _vraddhn_s32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p128)"] +#[doc = "Rounding Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(raddhn) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_p128(a: p128) -> float16x8_t { - unsafe { transmute(a) } +pub fn vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.raddhn.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v2i32")] + fn _vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t; + } + unsafe { _vraddhn_s64(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p128)"] +#[doc = "Rounding Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(raddhn) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_p128(a: p128) -> float16x8_t { - unsafe { - let ret_val: float16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { + unsafe { transmute(vraddhn_s16(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f16)"] +#[doc = "Rounding Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(raddhn) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t { - unsafe { transmute(a) } +pub fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { + unsafe { transmute(vraddhn_s32(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f16)"] +#[doc = "Rounding Add returning High Narrow."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(raddhn) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t { - unsafe { - let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) - } +pub fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { + unsafe { transmute(vraddhn_s64(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f16)"] +#[doc = "Reciprocal estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f16)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frecpe) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), stable(feature = "stdarch_neon_fp16", since = "1.94.0") @@ -42240,20 +41816,27 @@ pub fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 { - unsafe { transmute(a) } +pub fn vrecpe_f16(a: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecpe.v4f16" + )] + fn _vrecpe_f16(a: float16x4_t) -> float16x4_t; + } + unsafe { _vrecpe_f16(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f16)"] +#[doc = "Reciprocal estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f16)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frecpe) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), stable(feature = "stdarch_neon_fp16", since = "1.94.0") @@ -42263,122 +41846,143 @@ pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 { - unsafe { - let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) +pub fn vrecpeq_f16(a: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecpe.v8f16" + )] + fn _vrecpeq_f16(a: float16x8_t) -> float16x8_t; } + unsafe { _vrecpeq_f16(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f16)"] +#[doc = "Reciprocal estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frecpe) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t { - unsafe { transmute(a) } +pub fn vrecpe_f32(a: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecpe.v2f32" + )] + fn _vrecpe_f32(a: float32x2_t) -> float32x2_t; + } + unsafe { _vrecpe_f32(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f16)"] +#[doc = "Reciprocal estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frecpe) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t { - unsafe { - let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) +pub fn vrecpeq_f32(a: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecpe.v4f32" + )] + fn _vrecpeq_f32(a: float32x4_t) -> float32x4_t; } + unsafe { _vrecpeq_f32(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p64)"] +#[doc = "Unsigned reciprocal estimate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urecpe) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t { - unsafe { transmute(a) } +pub fn vrecpe_u32(a: uint32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urecpe.v2i32" + )] + fn _vrecpe_u32(a: uint32x2_t) -> uint32x2_t; + } + unsafe { _vrecpe_u32(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p64)"] +#[doc = "Unsigned reciprocal estimate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urecpe) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t { - unsafe { - let ret_val: float16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +pub fn vrecpeq_u32(a: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urecpe.v4i32" + )] + fn _vrecpeq_u32(a: uint32x4_t) -> uint32x4_t; } + unsafe { _vrecpeq_u32(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p64)"] +#[doc = "Floating-point reciprocal step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f16)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frecps) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), stable(feature = "stdarch_neon_fp16", since = "1.94.0") @@ -42388,20 +41992,27 @@ pub fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t { - unsafe { transmute(a) } +pub fn vrecps_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecps.v4f16" + )] + fn _vrecps_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + } + unsafe { _vrecps_f16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p64)"] +#[doc = "Floating-point reciprocal step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f16)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frecps) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), stable(feature = "stdarch_neon_fp16", since = "1.94.0") @@ -42411,23 +42022,26 @@ pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg(not(target_arch = "arm64ec"))] -pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t { - unsafe { - let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +pub fn vrecpsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecps.v8f16" + )] + fn _vrecpsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; } + unsafe { _vrecpsq_f16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"] +#[doc = "Floating-point reciprocal step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frecps) )] #[cfg_attr( not(target_arch = "arm"), @@ -42437,19 +42051,26 @@ pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t { - unsafe { transmute(a) } +pub fn vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecps.v2f32" + )] + fn _vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + unsafe { _vrecps_f32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"] +#[doc = "Floating-point reciprocal step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frecps) )] #[cfg_attr( not(target_arch = "arm"), @@ -42459,16 +42080,20 @@ pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t { - unsafe { - let ret_val: float32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +pub fn vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frecps.v4f32" + )] + fn _vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; } + unsafe { _vrecpsq_f32(a, b) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42478,19 +42103,19 @@ pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f32_f16(a: float16x4_t) -> float32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42500,23 +42125,19 @@ pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t { - unsafe { - let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42526,19 +42147,19 @@ pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42548,23 +42169,19 @@ pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t { - unsafe { - let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42574,19 +42191,19 @@ pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42596,23 +42213,19 @@ pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t { - unsafe { - let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42622,19 +42235,19 @@ pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42644,22 +42257,19 @@ pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t { - unsafe { - let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42669,19 +42279,19 @@ pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42691,23 +42301,19 @@ pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t { - unsafe { - let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42717,19 +42323,19 @@ pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42739,23 +42345,19 @@ pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t { - unsafe { - let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42765,19 +42367,19 @@ pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42787,23 +42389,19 @@ pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t { - unsafe { - let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42813,19 +42411,19 @@ pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42835,22 +42433,19 @@ pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t { - unsafe { - let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42860,19 +42455,19 @@ pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42882,23 +42477,19 @@ pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t { - unsafe { - let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42908,19 +42499,19 @@ pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42930,23 +42521,19 @@ pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t { - unsafe { - let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42956,19 +42543,19 @@ pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -42978,22 +42565,19 @@ pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 { - unsafe { - let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43003,19 +42587,19 @@ pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43025,27 +42609,19 @@ pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t { - unsafe { - let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43055,19 +42631,19 @@ pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43077,23 +42653,19 @@ pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t { - unsafe { - let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43103,19 +42675,19 @@ pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43125,23 +42697,19 @@ pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t { - unsafe { - let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43151,19 +42719,19 @@ pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43173,23 +42741,19 @@ pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t { - unsafe { - let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43199,19 +42763,19 @@ pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43221,27 +42785,19 @@ pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t { - unsafe { - let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43251,19 +42807,19 @@ pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43273,23 +42829,19 @@ pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t { - unsafe { - let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43299,19 +42851,19 @@ pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43321,23 +42873,19 @@ pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t { - unsafe { - let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43347,19 +42895,19 @@ pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43369,23 +42917,19 @@ pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t { - unsafe { - let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43395,19 +42939,19 @@ pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_s16(a: int16x4_t) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43417,27 +42961,19 @@ pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t { - unsafe { - let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_s16(a: int16x8_t) -> float16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43447,19 +42983,19 @@ pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_u16(a: uint16x4_t) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43469,23 +43005,19 @@ pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t { - unsafe { - let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_u16(a: uint16x8_t) -> float16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43495,19 +43027,19 @@ pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_p16(a: poly16x4_t) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43517,23 +43049,19 @@ pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t { - unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_p16(a: poly16x8_t) -> float16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43549,13 +43077,12 @@ pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t { +pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43571,17 +43098,12 @@ pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t { - unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43597,13 +43119,12 @@ pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t { +pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43619,17 +43140,12 @@ pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t { - unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43645,13 +43161,12 @@ pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t { +pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43667,16 +43182,12 @@ pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t { - unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) - } +pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43692,13 +43203,12 @@ pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t { +pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43714,17 +43224,12 @@ pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t { - unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43740,13 +43245,12 @@ pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t { +pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43762,17 +43266,12 @@ pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t { - unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43788,13 +43287,12 @@ pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t { +pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43810,17 +43308,12 @@ pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t { - unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43836,13 +43329,12 @@ pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t { +pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43858,16 +43350,12 @@ pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t { - unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) - } +pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43883,13 +43371,12 @@ pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t { +pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43905,17 +43392,12 @@ pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t { - unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43931,13 +43413,12 @@ pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t { +pub fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43953,17 +43434,12 @@ pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t { - unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -43979,13 +43455,12 @@ pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t { +pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44001,18 +43476,12 @@ pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t { - unsafe { - let a: int8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44028,13 +43497,12 @@ pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t { +pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44050,18 +43518,12 @@ pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t { - unsafe { - let a: int8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44077,13 +43539,12 @@ pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t { +pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44099,18 +43560,12 @@ pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t { - unsafe { - let a: int8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44126,13 +43581,12 @@ pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t { +pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44148,18 +43602,12 @@ pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t { - unsafe { - let a: int8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44175,13 +43623,12 @@ pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t { +pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44197,22 +43644,12 @@ pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t { - unsafe { - let a: int8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44228,13 +43665,12 @@ pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t { +pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44250,18 +43686,12 @@ pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t { - unsafe { - let a: int8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44277,13 +43707,12 @@ pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t { +pub fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44299,18 +43728,12 @@ pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t { - unsafe { - let a: int8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44326,13 +43749,12 @@ pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t { +pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44348,18 +43770,12 @@ pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t { - unsafe { - let a: int8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44375,13 +43791,12 @@ pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t { +pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44397,22 +43812,12 @@ pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t { - unsafe { - let a: int8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44428,13 +43833,12 @@ pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t { +pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44450,18 +43854,12 @@ pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t { - unsafe { - let a: int8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44477,13 +43875,12 @@ pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t { +pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44499,19 +43896,14 @@ pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p128)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -44519,21 +43911,21 @@ pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_p128(a: p128) -> float16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -44541,25 +43933,21 @@ pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -44567,21 +43955,21 @@ pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -44589,25 +43977,21 @@ pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -44615,21 +43999,21 @@ pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -44637,22 +44021,19 @@ pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t { )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) - } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44668,13 +44049,12 @@ pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t { +pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44690,17 +44070,12 @@ pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44716,13 +44091,12 @@ pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t { +pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44738,17 +44112,12 @@ pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44764,13 +44133,12 @@ pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t { +pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44786,17 +44154,12 @@ pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44812,13 +44175,12 @@ pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t { +pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44834,16 +44196,12 @@ pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) - } +pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44859,13 +44217,12 @@ pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t { +pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44881,17 +44238,12 @@ pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44907,13 +44259,12 @@ pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t { +pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44929,17 +44280,12 @@ pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44955,13 +44301,12 @@ pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t { +pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -44977,17 +44322,12 @@ pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t { - unsafe { - let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45003,13 +44343,12 @@ pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t { +pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45025,21 +44364,12 @@ pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t { - unsafe { - let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45055,13 +44385,12 @@ pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t { +pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45077,17 +44406,12 @@ pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t { - unsafe { - let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45103,13 +44427,12 @@ pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t { +pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45125,17 +44448,12 @@ pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t { - unsafe { - let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45151,13 +44469,12 @@ pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t { +pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45173,21 +44490,12 @@ pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t { - unsafe { - let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45203,13 +44511,12 @@ pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t { +pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45225,17 +44532,12 @@ pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t { - unsafe { - let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45251,13 +44553,12 @@ pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t { +pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45273,17 +44574,12 @@ pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t { - unsafe { - let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45299,13 +44595,12 @@ pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t { +pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45321,17 +44616,12 @@ pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t { - unsafe { - let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45347,13 +44637,12 @@ pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t { +pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45369,21 +44658,12 @@ pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t { - unsafe { - let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45399,13 +44679,12 @@ pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t { +pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45421,17 +44700,12 @@ pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t { - unsafe { - let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45447,13 +44721,12 @@ pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t { +pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45469,17 +44742,12 @@ pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t { - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45495,13 +44763,12 @@ pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t { +pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45517,17 +44784,12 @@ pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t { - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45543,13 +44805,12 @@ pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t { +pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45565,17 +44826,12 @@ pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t { - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45591,13 +44847,12 @@ pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t { +pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45613,16 +44868,12 @@ pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t { - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) - } +pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45638,13 +44889,12 @@ pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t { +pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45660,17 +44910,12 @@ pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t { - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45686,13 +44931,12 @@ pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t { +pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45708,17 +44952,12 @@ pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t { - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45734,13 +44973,12 @@ pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t { +pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45756,17 +44994,12 @@ pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t { - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45782,13 +45015,12 @@ pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t { +pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45804,16 +45036,12 @@ pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t { - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) - } +pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45829,13 +45057,12 @@ pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t { +pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45851,17 +45078,12 @@ pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t { - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45877,13 +45099,12 @@ pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t { +pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45899,17 +45120,12 @@ pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t { - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45925,13 +45141,12 @@ pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t { +pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45947,17 +45162,12 @@ pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t { - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45973,13 +45183,12 @@ pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t { +pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -45995,21 +45204,12 @@ pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t { - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46025,13 +45225,12 @@ pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t { +pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46047,17 +45246,12 @@ pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t { - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46073,13 +45267,12 @@ pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t { +pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46095,17 +45288,12 @@ pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t { - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46121,13 +45309,12 @@ pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t { +pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46143,21 +45330,12 @@ pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t { - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46173,13 +45351,12 @@ pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t { +pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46195,17 +45372,12 @@ pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t { - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46221,13 +45393,12 @@ pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t { +pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46243,17 +45414,12 @@ pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t { - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46269,13 +45435,12 @@ pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t { +pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46291,17 +45456,12 @@ pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t { - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46317,13 +45477,12 @@ pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t { +pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46339,21 +45498,12 @@ pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t { - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46369,13 +45519,12 @@ pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t { +pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46391,17 +45540,12 @@ pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t { - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46417,13 +45561,12 @@ pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t { +pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46439,16 +45582,12 @@ pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t { - unsafe { - let ret_val: float32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46464,13 +45603,12 @@ pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t { +pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46486,16 +45624,12 @@ pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t { - unsafe { - let ret_val: int8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46511,13 +45645,12 @@ pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t { +pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46534,15 +45667,11 @@ pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t { - unsafe { - let ret_val: int16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46562,9 +45691,8 @@ pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46580,16 +45708,12 @@ pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t { - unsafe { - let ret_val: int32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46605,13 +45729,12 @@ pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t { +pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46627,16 +45750,12 @@ pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t { - unsafe { - let ret_val: uint8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46652,13 +45771,12 @@ pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t { +pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46674,16 +45792,12 @@ pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t { - unsafe { - let ret_val: uint16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46699,13 +45813,12 @@ pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t { +pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46721,14 +45834,11 @@ pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t { - unsafe { - let ret_val: uint32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -46745,13 +45855,12 @@ pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t { +pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46767,13 +45876,12 @@ pub fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t { +pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46789,16 +45897,12 @@ pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t { - unsafe { - let ret_val: poly8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46814,13 +45918,12 @@ pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t { +pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46836,16 +45939,12 @@ pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t { - unsafe { - let ret_val: poly16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46861,13 +45960,12 @@ pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t { +pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46883,17 +45981,12 @@ pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t { - unsafe { - let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46909,13 +46002,12 @@ pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t { +pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46931,21 +46023,12 @@ pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t { - unsafe { - let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46961,13 +46044,12 @@ pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t { +pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -46983,17 +46065,12 @@ pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t { - unsafe { - let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47009,13 +46086,12 @@ pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t { +pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47031,17 +46107,12 @@ pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t { - unsafe { - let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47057,13 +46128,12 @@ pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t { +pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47079,21 +46149,12 @@ pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t { - unsafe { - let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47109,13 +46170,12 @@ pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t { +pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47131,17 +46191,12 @@ pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t { - unsafe { - let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47157,13 +46212,12 @@ pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t { +pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47179,17 +46233,12 @@ pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t { - unsafe { - let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47205,13 +46254,12 @@ pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t { +pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47227,17 +46275,12 @@ pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t { - unsafe { - let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47253,13 +46296,12 @@ pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t { +pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47275,21 +46317,12 @@ pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t { - unsafe { - let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47305,13 +46338,12 @@ pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t { +pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47327,17 +46359,12 @@ pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t { - unsafe { - let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47353,13 +46380,12 @@ pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t { +pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47375,17 +46401,12 @@ pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47401,13 +46422,12 @@ pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t { +pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47423,17 +46443,12 @@ pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47449,13 +46464,12 @@ pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t { +pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47471,17 +46485,12 @@ pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47497,13 +46506,12 @@ pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t { +pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47519,17 +46527,12 @@ pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47545,13 +46548,12 @@ pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t { +pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47567,16 +46569,12 @@ pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) - } +pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47592,13 +46590,12 @@ pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t { +pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47614,17 +46611,12 @@ pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47640,13 +46632,12 @@ pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t { +pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47662,17 +46653,12 @@ pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47688,13 +46674,12 @@ pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t { +pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47710,16 +46695,12 @@ pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) - } +pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47735,13 +46716,12 @@ pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t { +pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47757,17 +46737,12 @@ pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47783,13 +46758,12 @@ pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t { +pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47805,17 +46779,12 @@ pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47831,13 +46800,12 @@ pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t { +pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47853,18 +46821,12 @@ pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t { - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47880,13 +46842,12 @@ pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t { +pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47902,22 +46863,12 @@ pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t { - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47933,13 +46884,12 @@ pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t { +pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47955,18 +46905,12 @@ pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t { - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -47982,13 +46926,12 @@ pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t { +pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48004,18 +46947,12 @@ pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t { - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48031,13 +46968,12 @@ pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t { +pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48053,18 +46989,12 @@ pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t { - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48080,13 +47010,12 @@ pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t { +pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48102,18 +47031,12 @@ pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t { - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48129,13 +47052,12 @@ pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t { +pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48151,18 +47073,12 @@ pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t { - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48178,13 +47094,12 @@ pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t { +pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48200,18 +47115,12 @@ pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t { - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48227,13 +47136,12 @@ pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t { +pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48249,22 +47157,12 @@ pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t { - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48280,13 +47178,12 @@ pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t { +pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48302,18 +47199,12 @@ pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t { - unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48329,13 +47220,12 @@ pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t { +pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48351,17 +47241,12 @@ pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t { - unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48377,13 +47262,12 @@ pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t { +pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48399,17 +47283,12 @@ pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t { - unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48425,13 +47304,12 @@ pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t { +pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48447,17 +47325,12 @@ pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t { - unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48473,13 +47346,12 @@ pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t { +pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48495,17 +47367,12 @@ pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t { - unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48521,13 +47388,12 @@ pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t { +pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48543,16 +47409,12 @@ pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t { - unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) - } +pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48568,13 +47430,12 @@ pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t { +pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48590,17 +47451,12 @@ pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t { - unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48616,13 +47472,12 @@ pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t { +pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48638,17 +47493,12 @@ pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t { - unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48664,13 +47514,12 @@ pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t { +pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48686,16 +47535,12 @@ pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t { - unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) - } +pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48711,13 +47556,12 @@ pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t { +pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48733,17 +47577,12 @@ pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t { - unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48759,13 +47598,12 @@ pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t { +pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48781,17 +47619,12 @@ pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t { - unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48807,13 +47640,12 @@ pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t { +pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48829,17 +47661,12 @@ pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48855,13 +47682,12 @@ pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t { +pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48877,21 +47703,12 @@ pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48907,13 +47724,12 @@ pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t { +pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48929,17 +47745,12 @@ pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48955,13 +47766,12 @@ pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t { +pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -48977,17 +47787,12 @@ pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49003,13 +47808,12 @@ pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t { +pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49025,17 +47829,12 @@ pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49051,13 +47850,12 @@ pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t { +pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49073,21 +47871,12 @@ pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49103,13 +47892,12 @@ pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t { +pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49125,17 +47913,12 @@ pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49151,13 +47934,12 @@ pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t { +pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49173,17 +47955,12 @@ pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49199,13 +47976,12 @@ pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t { +pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49221,21 +47997,12 @@ pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49251,13 +48018,12 @@ pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t { +pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49273,17 +48039,12 @@ pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49299,13 +48060,12 @@ pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t { +pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49321,17 +48081,12 @@ pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t { - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49347,13 +48102,12 @@ pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t { +pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49369,17 +48123,12 @@ pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t { - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49395,13 +48144,12 @@ pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t { +pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49417,17 +48165,12 @@ pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t { - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49443,13 +48186,12 @@ pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t { +pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49465,17 +48207,12 @@ pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t { - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49491,13 +48228,12 @@ pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t { +pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49513,16 +48249,12 @@ pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t { - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) - } +pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49538,13 +48270,12 @@ pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t { +pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49560,17 +48291,12 @@ pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t { - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49586,13 +48312,12 @@ pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t { +pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49608,17 +48333,12 @@ pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t { - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49634,13 +48354,12 @@ pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t { +pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49656,16 +48375,12 @@ pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t { - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) - } +pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49681,13 +48396,12 @@ pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t { +pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49703,17 +48417,12 @@ pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t { - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49729,13 +48438,12 @@ pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t { +pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49751,17 +48459,12 @@ pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t { - unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49777,13 +48480,12 @@ pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t { +pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49799,17 +48501,12 @@ pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49825,13 +48522,12 @@ pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t { +pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49847,21 +48543,12 @@ pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49877,13 +48564,12 @@ pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t { +pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49899,17 +48585,12 @@ pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49925,13 +48606,12 @@ pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t { +pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49947,17 +48627,12 @@ pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49973,13 +48648,12 @@ pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t { +pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -49995,17 +48669,12 @@ pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -50021,13 +48690,12 @@ pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t { +pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -50043,21 +48711,12 @@ pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] @@ -50073,15 +48732,14 @@ pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t { +pub fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p128)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -50095,19 +48753,14 @@ pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p128)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -50121,15 +48774,14 @@ pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t { +pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p128)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -50143,19 +48795,14 @@ pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p128)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -50169,15 +48816,14 @@ pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t { +pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p128)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -50191,23 +48837,14 @@ pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p128)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -50221,15 +48858,14 @@ pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t { +pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p128)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -50243,19 +48879,14 @@ pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p128)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -50269,15 +48900,14 @@ pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t { +pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p128)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -50291,18 +48921,14 @@ pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t { - unsafe { - let ret_val: float32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p128)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -50316,15 +48942,14 @@ pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t { +pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t { unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p128)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -50338,18 +48963,14 @@ pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t { - unsafe { - let ret_val: int8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t { + unsafe { transmute(a) } } #[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p64)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -50363,19 +48984,19 @@ pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t { +pub fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 { unsafe { transmute(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_p8)"] #[inline] -#[cfg(target_endian = "big")] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev16) )] #[cfg_attr( not(target_arch = "arm"), @@ -50385,22 +49006,19 @@ pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t { - unsafe { - let ret_val: int16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vrev16_p8(a: poly8x8_t) -> poly8x8_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_p8)"] #[inline] -#[cfg(target_endian = "little")] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev16) )] #[cfg_attr( not(target_arch = "arm"), @@ -50410,19 +49028,23 @@ pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t { - unsafe { transmute(a) } +pub fn vrev16_p8(a: poly8x8_t) -> poly8x8_t { + unsafe { + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_s8)"] #[inline] -#[cfg(target_endian = "big")] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev16) )] #[cfg_attr( not(target_arch = "arm"), @@ -50432,21 +49054,19 @@ pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t { - unsafe { - let ret_val: int32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vrev16_s8(a: int8x8_t) -> int8x8_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev16) )] #[cfg_attr( not(target_arch = "arm"), @@ -50456,19 +49076,23 @@ pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t { - unsafe { transmute(a) } +pub fn vrev16_s8(a: int8x8_t) -> int8x8_t { + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_u8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev16) )] #[cfg_attr( not(target_arch = "arm"), @@ -50478,19 +49102,19 @@ pub fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t { - unsafe { transmute(a) } +pub fn vrev16_u8(a: uint8x8_t) -> uint8x8_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_u8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev16) )] #[cfg_attr( not(target_arch = "arm"), @@ -50500,22 +49124,23 @@ pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t { +pub fn vrev16_u8(a: uint8x8_t) -> uint8x8_t { unsafe { - let ret_val: uint8x8_t = transmute(a); + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]); simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_p8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev16) )] #[cfg_attr( not(target_arch = "arm"), @@ -50525,19 +49150,19 @@ pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t { - unsafe { transmute(a) } +pub fn vrev16q_p8(a: poly8x16_t) -> poly8x16_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_p8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev16) )] #[cfg_attr( not(target_arch = "arm"), @@ -50547,22 +49172,29 @@ pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t { +pub fn vrev16q_p8(a: poly8x16_t) -> poly8x16_t { unsafe { - let ret_val: uint16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = + simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_s8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev16) )] #[cfg_attr( not(target_arch = "arm"), @@ -50572,19 +49204,19 @@ pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t { - unsafe { transmute(a) } +pub fn vrev16q_s8(a: int8x16_t) -> int8x16_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_s8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev16) )] #[cfg_attr( not(target_arch = "arm"), @@ -50594,22 +49226,29 @@ pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t { +pub fn vrev16q_s8(a: int8x16_t) -> int8x16_t { unsafe { - let ret_val: uint32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = + simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_u8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev16) )] #[cfg_attr( not(target_arch = "arm"), @@ -50619,19 +49258,19 @@ pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t { - unsafe { transmute(a) } +pub fn vrev16q_u8(a: uint8x16_t) -> uint8x16_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_u8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev16) )] #[cfg_attr( not(target_arch = "arm"), @@ -50641,22 +49280,29 @@ pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t { +pub fn vrev16q_u8(a: uint8x16_t) -> uint8x16_t { unsafe { - let ret_val: poly8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = + simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_p16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -50666,19 +49312,19 @@ pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t { - unsafe { transmute(a) } +pub fn vrev32_p16(a: poly16x4_t) -> poly16x4_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_p16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -50688,22 +49334,23 @@ pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t { +pub fn vrev32_p16(a: poly16x4_t) -> poly16x4_t { unsafe { - let ret_val: poly16x4_t = transmute(a); + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: poly16x4_t = simd_shuffle!(a, a, [1, 0, 3, 2]); simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_p8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -50713,19 +49360,19 @@ pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t { - unsafe { transmute(a) } +pub fn vrev32_p8(a: poly8x8_t) -> poly8x8_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_p8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -50735,23 +49382,23 @@ pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t { +pub fn vrev32_p8(a: poly8x8_t) -> poly8x8_t { unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: float32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_s16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -50761,19 +49408,19 @@ pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t { - unsafe { transmute(a) } +pub fn vrev32_s16(a: int16x4_t) -> int16x4_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_s16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -50783,27 +49430,23 @@ pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t { +pub fn vrev32_s16(a: int16x4_t) -> int16x4_t { unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: int16x4_t = simd_shuffle!(a, a, [1, 0, 3, 2]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)"] -#[inline] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_s8)"] +#[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -50813,19 +49456,19 @@ pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t { - unsafe { transmute(a) } +pub fn vrev32_s8(a: int8x8_t) -> int8x8_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_s8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -50835,23 +49478,23 @@ pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t { +pub fn vrev32_s8(a: int8x8_t) -> int8x8_t { unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int16x8_t = transmute(a); + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]); simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_u16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -50861,19 +49504,19 @@ pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t { - unsafe { transmute(a) } +pub fn vrev32_u16(a: uint16x4_t) -> uint16x4_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_u16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -50883,23 +49526,23 @@ pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t { +pub fn vrev32_u16(a: uint16x4_t) -> uint16x4_t { unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int32x4_t = transmute(a); + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: uint16x4_t = simd_shuffle!(a, a, [1, 0, 3, 2]); simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_u8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -50909,19 +49552,19 @@ pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t { - unsafe { transmute(a) } +pub fn vrev32_u8(a: uint8x8_t) -> uint8x8_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_u8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -50931,23 +49574,23 @@ pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t { +pub fn vrev32_u8(a: uint8x8_t) -> uint8x8_t { unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_p16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -50957,19 +49600,19 @@ pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t { - unsafe { transmute(a) } +pub fn vrev32q_p16(a: poly16x8_t) -> poly16x8_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_p16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -50979,27 +49622,23 @@ pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t { +pub fn vrev32q_p16(a: poly16x8_t) -> poly16x8_t { unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x8_t = simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_p8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -51009,19 +49648,19 @@ pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t { - unsafe { transmute(a) } +pub fn vrev32q_p8(a: poly8x16_t) -> poly8x16_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_p8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -51031,23 +49670,29 @@ pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t { +pub fn vrev32q_p8(a: poly8x16_t) -> poly8x16_t { unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = + simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_s16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -51057,19 +49702,19 @@ pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t { - unsafe { transmute(a) } +pub fn vrev32q_s16(a: int16x8_t) -> int16x8_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_s16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -51079,23 +49724,23 @@ pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t { +pub fn vrev32q_s16(a: int16x8_t) -> int16x8_t { unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x8_t = simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_s8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -51105,19 +49750,19 @@ pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t { - unsafe { transmute(a) } +pub fn vrev32q_s8(a: int8x16_t) -> int8x16_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_s8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -51127,10 +49772,12 @@ pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t { +pub fn vrev32q_s8(a: int8x16_t) -> int8x16_t { unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly8x16_t = transmute(a); + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = + simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]); simd_shuffle!( ret_val, ret_val, @@ -51138,16 +49785,16 @@ pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t { ) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_u16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -51157,19 +49804,19 @@ pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t { - unsafe { transmute(a) } +pub fn vrev32q_u16(a: uint16x8_t) -> uint16x8_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_u16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -51179,23 +49826,23 @@ pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t { +pub fn vrev32q_u16(a: uint16x8_t) -> uint16x8_t { unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly16x8_t = transmute(a); + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x8_t = simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]); simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_u8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -51205,19 +49852,19 @@ pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t { - unsafe { transmute(a) } +pub fn vrev32q_u8(a: uint8x16_t) -> uint8x16_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_u8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev32) )] #[cfg_attr( not(target_arch = "arm"), @@ -51227,23 +49874,29 @@ pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t { +pub fn vrev32q_u8(a: uint8x16_t) -> uint8x16_t { unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = + simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_f32)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51253,19 +49906,19 @@ pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t { - unsafe { transmute(a) } +pub fn vrev64_f32(a: float32x2_t) -> float32x2_t { + unsafe { simd_shuffle!(a, a, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_f32)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51275,23 +49928,23 @@ pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t { +pub fn vrev64_f32(a: float32x2_t) -> float32x2_t { unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: float32x2_t = simd_shuffle!(a, a, [1, 0]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_p16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51301,19 +49954,19 @@ pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t { - unsafe { transmute(a) } +pub fn vrev64_p16(a: poly16x4_t) -> poly16x4_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_p16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51323,23 +49976,23 @@ pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t { +pub fn vrev64_p16(a: poly16x4_t) -> poly16x4_t { unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int16x4_t = transmute(a); + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_p8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51349,19 +50002,19 @@ pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t { - unsafe { transmute(a) } +pub fn vrev64_p8(a: poly8x8_t) -> poly8x8_t { + unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_p8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51371,23 +50024,23 @@ pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t { +pub fn vrev64_p8(a: poly8x8_t) -> poly8x8_t { unsafe { let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let ret_val: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51397,19 +50050,19 @@ pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t { - unsafe { transmute(a) } +pub fn vrev64_s16(a: int16x4_t) -> int16x4_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51419,22 +50072,23 @@ pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t { +pub fn vrev64_s16(a: int16x4_t) -> int16x4_t { unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s32)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51444,19 +50098,19 @@ pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t { - unsafe { transmute(a) } +pub fn vrev64_s32(a: int32x2_t) -> int32x2_t { + unsafe { simd_shuffle!(a, a, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s32)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51466,23 +50120,23 @@ pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t { +pub fn vrev64_s32(a: int32x2_t) -> int32x2_t { unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: int32x2_t = simd_shuffle!(a, a, [1, 0]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51492,19 +50146,19 @@ pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t { - unsafe { transmute(a) } +pub fn vrev64_s8(a: int8x8_t) -> int8x8_t { + unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51514,23 +50168,23 @@ pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t { +pub fn vrev64_s8(a: int8x8_t) -> int8x8_t { unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51540,19 +50194,19 @@ pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t { - unsafe { transmute(a) } +pub fn vrev64_u16(a: uint16x4_t) -> uint16x4_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51562,23 +50216,23 @@ pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t { +pub fn vrev64_u16(a: uint16x4_t) -> uint16x4_t { unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u32)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51588,19 +50242,19 @@ pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t { - unsafe { transmute(a) } +pub fn vrev64_u32(a: uint32x2_t) -> uint32x2_t { + unsafe { simd_shuffle!(a, a, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u32)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51610,22 +50264,23 @@ pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t { +pub fn vrev64_u32(a: uint32x2_t) -> uint32x2_t { unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let ret_val: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51635,19 +50290,19 @@ pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t { - unsafe { transmute(a) } +pub fn vrev64_u8(a: uint8x8_t) -> uint8x8_t { + unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51657,23 +50312,23 @@ pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t { +pub fn vrev64_u8(a: uint8x8_t) -> uint8x8_t { unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_f32)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51683,19 +50338,19 @@ pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t { - unsafe { transmute(a) } +pub fn vrev64q_f32(a: float32x4_t) -> float32x4_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_f32)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51705,24 +50360,23 @@ pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t { +pub fn vrev64q_f32(a: float32x4_t) -> float32x4_t { unsafe { - let a: poly8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float32x4_t = transmute(a); + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float32x4_t = simd_shuffle!(a, a, [1, 0, 3, 2]); simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_p16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51732,19 +50386,19 @@ pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t { - unsafe { transmute(a) } +pub fn vrev64q_p16(a: poly16x8_t) -> poly16x8_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_p16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51754,28 +50408,23 @@ pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t { +pub fn vrev64q_p16(a: poly16x8_t) -> poly16x8_t { unsafe { - let a: poly8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x8_t = simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_p8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51785,19 +50434,19 @@ pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t { - unsafe { transmute(a) } +pub fn vrev64q_p8(a: poly8x16_t) -> poly8x16_t { + unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_p8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51807,24 +50456,29 @@ pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t { +pub fn vrev64q_p8(a: poly8x16_t) -> poly8x16_t { unsafe { let a: poly8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let ret_val: poly8x16_t = + simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51834,19 +50488,19 @@ pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t { - unsafe { transmute(a) } +pub fn vrev64q_s16(a: int16x8_t) -> int16x8_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51856,24 +50510,23 @@ pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t { +pub fn vrev64q_s16(a: int16x8_t) -> int16x8_t { unsafe { - let a: poly8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x8_t = simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s32)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51883,19 +50536,19 @@ pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t { - unsafe { transmute(a) } +pub fn vrev64q_s32(a: int32x4_t) -> int32x4_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s32)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51905,24 +50558,23 @@ pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t { +pub fn vrev64q_s32(a: int32x4_t) -> int32x4_t { unsafe { - let a: poly8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: int32x4_t = simd_shuffle!(a, a, [1, 0, 3, 2]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51932,19 +50584,19 @@ pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t { - unsafe { transmute(a) } +pub fn vrev64q_s8(a: int8x16_t) -> int8x16_t { + unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51954,11 +50606,12 @@ pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t { +pub fn vrev64q_s8(a: int8x16_t) -> int8x16_t { unsafe { - let a: poly8x16_t = + let a: int8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x16_t = transmute(a); + let ret_val: int8x16_t = + simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]); simd_shuffle!( ret_val, ret_val, @@ -51966,16 +50619,16 @@ pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t { ) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -51985,19 +50638,19 @@ pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t { - unsafe { transmute(a) } +pub fn vrev64q_u16(a: uint16x8_t) -> uint16x8_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -52007,24 +50660,23 @@ pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t { +pub fn vrev64q_u16(a: uint16x8_t) -> uint16x8_t { unsafe { - let a: poly8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint16x8_t = transmute(a); + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x8_t = simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]); simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u32)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -52034,19 +50686,19 @@ pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t { - unsafe { transmute(a) } +pub fn vrev64q_u32(a: uint32x4_t) -> uint32x4_t { + unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u32)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -52056,24 +50708,23 @@ pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t { +pub fn vrev64q_u32(a: uint32x4_t) -> uint32x4_t { unsafe { - let a: poly8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint32x4_t = transmute(a); + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: uint32x4_t = simd_shuffle!(a, a, [1, 0, 3, 2]); simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -52083,19 +50734,19 @@ pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t { - unsafe { transmute(a) } +pub fn vrev64q_u8(a: uint8x16_t) -> uint8x16_t { + unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)"] +#[doc = "Reversing vector elements (swap endianness)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] #[cfg_attr( not(target_arch = "arm"), @@ -52105,121 +50756,128 @@ pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t { +pub fn vrev64q_u8(a: uint8x16_t) -> uint8x16_t { unsafe { - let a: poly8x16_t = + let a: uint8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let ret_val: uint8x16_t = + simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)"] +#[doc = "Reverse elements in 64-bit doublewords"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_f16)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrev64))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t { - unsafe { transmute(a) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vrev64_f16(a: float16x4_t) -> float16x4_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)"] +#[doc = "Reverse elements in 64-bit doublewords"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_f16)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrev64))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vrev64_f16(a: float16x4_t) -> float16x4_t { unsafe { - let a: poly8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let ret_val: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)"] +#[doc = "Reverse elements in 64-bit doublewords"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_f16)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrev64))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t { - unsafe { transmute(a) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vrev64q_f16(a: float16x8_t) -> float16x8_t { + unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)"] +#[doc = "Reverse elements in 64-bit doublewords"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_f16)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrev64))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rev64) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vrev64q_f16(a: float16x8_t) -> float16x8_t { unsafe { - let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: float32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)"] +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -52229,19 +50887,26 @@ pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t { - unsafe { transmute(a) } +pub fn vrhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srhadd.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i8")] + fn _vrhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { _vrhadd_s8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)"] +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -52251,23 +50916,26 @@ pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t { - unsafe { - let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +pub fn vrhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srhadd.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v16i8")] + fn _vrhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; } + unsafe { _vrhaddq_s8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p16)"] +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -52277,19 +50945,26 @@ pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t { - unsafe { transmute(a) } +pub fn vrhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srhadd.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i16")] + fn _vrhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vrhadd_s16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p16)"] +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -52299,23 +50974,26 @@ pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t { - unsafe { - let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +pub fn vrhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srhadd.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i16")] + fn _vrhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; } + unsafe { _vrhaddq_s16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)"] +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -52325,19 +51003,26 @@ pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t { - unsafe { transmute(a) } +pub fn vrhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srhadd.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v2i32")] + fn _vrhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vrhadd_s32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)"] +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -52347,23 +51032,26 @@ pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t { - unsafe { - let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: int32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) +pub fn vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srhadd.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i32")] + fn _vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; } + unsafe { _vrhaddq_s32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)"] +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -52373,19 +51061,26 @@ pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t { - unsafe { transmute(a) } +pub fn vrhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urhadd.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i8")] + fn _vrhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } + unsafe { _vrhadd_u8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)"] +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -52395,22 +51090,26 @@ pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t { - unsafe { - let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) +pub fn vrhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urhadd.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v16i8")] + fn _vrhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; } + unsafe { _vrhaddq_u8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)"] +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -52420,19 +51119,26 @@ pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t { - unsafe { transmute(a) } +pub fn vrhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urhadd.v4i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i16")] + fn _vrhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } + unsafe { _vrhadd_u16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)"] +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -52442,23 +51148,26 @@ pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t { - unsafe { - let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +pub fn vrhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urhadd.v8i16" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i16")] + fn _vrhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; } + unsafe { _vrhaddq_u16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p16)"] +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -52468,19 +51177,26 @@ pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t { - unsafe { transmute(a) } +pub fn vrhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urhadd.v2i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v2i32")] + fn _vrhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } + unsafe { _vrhadd_u32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p16)"] +#[doc = "Rounding halving add"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urhadd) )] #[cfg_attr( not(target_arch = "arm"), @@ -52490,71 +51206,84 @@ pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t { - unsafe { - let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +pub fn vrhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urhadd.v4i32" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i32")] + fn _vrhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; } + unsafe { _vrhaddq_u32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)"] +#[doc = "Floating-point round to integral, to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f16)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frintn) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t { - unsafe { transmute(a) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vrndn_f16(a: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm"), + link_name = "llvm.roundeven.v4f16" + )] + fn _vrndn_f16(a: float16x4_t) -> float16x4_t; + } + unsafe { _vrndn_f16(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)"] +#[doc = "Floating-point round to integral, to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnq_f16)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frintn) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t { - unsafe { - let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: uint32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) +#[cfg(not(target_arch = "arm64ec"))] +pub fn vrndnq_f16(a: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm"), + link_name = "llvm.roundeven.v8f16" + )] + fn _vrndnq_f16(a: float16x8_t) -> float16x8_t; } + unsafe { _vrndnq_f16(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)"] +#[doc = "Floating-point round to integral, to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frintn) )] #[cfg_attr( not(target_arch = "arm"), @@ -52564,19 +51293,25 @@ pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t { - unsafe { transmute(a) } +pub fn vrndn_f32(a: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm"), + link_name = "llvm.roundeven.v2f32" + )] + fn _vrndn_f32(a: float32x2_t) -> float32x2_t; + } + unsafe { _vrndn_f32(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)"] +#[doc = "Floating-point round to integral, to nearest with ties to even"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnq_f32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frintn) )] #[cfg_attr( not(target_arch = "arm"), @@ -52586,22 +51321,25 @@ pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t { - unsafe { - let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) +pub fn vrndnq_f32(a: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm"), + link_name = "llvm.roundeven.v4f32" + )] + fn _vrndnq_f32(a: float32x4_t) -> float32x4_t; } + unsafe { _vrndnq_f32(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)"] +#[doc = "Signed rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -52611,19 +51349,26 @@ pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t { - unsafe { transmute(a) } +pub fn vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srshl.v8i8" + )] + fn _vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { _vrshl_s8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)"] +#[doc = "Signed rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -52633,23 +51378,26 @@ pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t { - unsafe { - let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +pub fn vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srshl.v16i8" + )] + fn _vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; } + unsafe { _vrshlq_s8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)"] +#[doc = "Signed rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -52659,19 +51407,26 @@ pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t { - unsafe { transmute(a) } +pub fn vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srshl.v4i16" + )] + fn _vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vrshl_s16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)"] +#[doc = "Signed rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -52681,23 +51436,26 @@ pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t { - unsafe { - let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: float32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +pub fn vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srshl.v8i16" + )] + fn _vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; } + unsafe { _vrshlq_s16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)"] +#[doc = "Signed rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -52707,19 +51465,26 @@ pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t { - unsafe { transmute(a) } +pub fn vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srshl.v2i32" + )] + fn _vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vrshl_s32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)"] +#[doc = "Signed rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -52729,27 +51494,26 @@ pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t { - unsafe { - let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) +pub fn vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srshl.v4i32" + )] + fn _vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; } + unsafe { _vrshlq_s32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p16)"] +#[doc = "Signed rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -52759,19 +51523,26 @@ pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t { - unsafe { transmute(a) } +pub fn vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v1i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srshl.v1i64" + )] + fn _vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; + } + unsafe { _vrshl_s64(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p16)"] +#[doc = "Signed rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -52781,23 +51552,26 @@ pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t { - unsafe { - let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +pub fn vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.srshl.v2i64" + )] + fn _vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; } + unsafe { _vrshlq_s64(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)"] +#[doc = "Unsigned rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -52807,19 +51581,26 @@ pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t { - unsafe { transmute(a) } +pub fn vrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urshl.v8i8" + )] + fn _vrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; + } + unsafe { _vrshl_u8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)"] +#[doc = "Unsigned rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -52829,23 +51610,26 @@ pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t { - unsafe { - let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +pub fn vrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urshl.v16i8" + )] + fn _vrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; } + unsafe { _vrshlq_u8(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)"] +#[doc = "Unsigned rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -52855,19 +51639,26 @@ pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t { - unsafe { transmute(a) } +pub fn vrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urshl.v4i16" + )] + fn _vrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; + } + unsafe { _vrshl_u16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)"] +#[doc = "Unsigned rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -52877,23 +51668,26 @@ pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t { - unsafe { - let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) +pub fn vrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urshl.v8i16" + )] + fn _vrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; } + unsafe { _vrshlq_u16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)"] +#[doc = "Unsigned rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -52903,19 +51697,26 @@ pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t { - unsafe { transmute(a) } +pub fn vrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urshl.v2i32" + )] + fn _vrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; + } + unsafe { _vrshl_u32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)"] +#[doc = "Unsigned rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -52925,27 +51726,26 @@ pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t { - unsafe { - let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) +pub fn vrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urshl.v4i32" + )] + fn _vrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; } + unsafe { _vrshlq_u32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p16)"] +#[doc = "Unsigned rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -52955,19 +51755,26 @@ pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t { - unsafe { transmute(a) } +pub fn vrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v1i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urshl.v1i64" + )] + fn _vrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; + } + unsafe { _vrshl_u64(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p16)"] +#[doc = "Unsigned rounding shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u64)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -52977,24 +51784,28 @@ pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t { - unsafe { - let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +pub fn vrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.urshl.v2i64" + )] + fn _vrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; } + unsafe { _vrshlq_u64(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)"] +#[doc = "Signed rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s8)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53003,20 +51814,21 @@ pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t { - unsafe { transmute(a) } +pub fn vrshr_n_s8(a: int8x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + vrshl_s8(a, vdup_n_s8(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)"] +#[doc = "Signed rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s8)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53025,24 +51837,21 @@ pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t { - unsafe { - let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vrshrq_n_s8(a: int8x16_t) -> int8x16_t { + static_assert!(N >= 1 && N <= 8); + vrshlq_s8(a, vdupq_n_s8(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)"] +#[doc = "Signed rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s16)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53051,20 +51860,21 @@ pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t { - unsafe { transmute(a) } +pub fn vrshr_n_s16(a: int16x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + vrshl_s16(a, vdup_n_s16(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)"] +#[doc = "Signed rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s16)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53073,24 +51883,21 @@ pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t { - unsafe { - let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vrshrq_n_s16(a: int16x8_t) -> int16x8_t { + static_assert!(N >= 1 && N <= 16); + vrshlq_s16(a, vdupq_n_s16(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)"] +#[doc = "Signed rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s32)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53099,20 +51906,21 @@ pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t { - unsafe { transmute(a) } +pub fn vrshr_n_s32(a: int32x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + vrshl_s32(a, vdup_n_s32(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)"] +#[doc = "Signed rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s32)"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53121,28 +51929,21 @@ pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t { - unsafe { - let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vrshrq_n_s32(a: int32x4_t) -> int32x4_t { + static_assert!(N >= 1 && N <= 32); + vrshlq_s32(a, vdupq_n_s32(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p128)"] +#[doc = "Signed rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s64)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53151,20 +51952,21 @@ pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t { - unsafe { transmute(a) } +pub fn vrshr_n_s64(a: int64x1_t) -> int64x1_t { + static_assert!(N >= 1 && N <= 64); + vrshl_s64(a, vdup_n_s64(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p128)"] +#[doc = "Signed rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s64)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53173,27 +51975,21 @@ pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t { - unsafe { - let ret_val: int8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) - } +pub fn vrshrq_n_s64(a: int64x2_t) -> int64x2_t { + static_assert!(N >= 1 && N <= 64); + vrshlq_s64(a, vdupq_n_s64(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p128)"] +#[doc = "Unsigned rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u8)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53202,20 +51998,21 @@ pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t { - unsafe { transmute(a) } +pub fn vrshr_n_u8(a: uint8x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + vrshl_u8(a, vdup_n_s8(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p128)"] +#[doc = "Unsigned rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u8)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53224,23 +52021,21 @@ pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t { - unsafe { - let ret_val: int16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vrshrq_n_u8(a: uint8x16_t) -> uint8x16_t { + static_assert!(N >= 1 && N <= 8); + vrshlq_u8(a, vdupq_n_s8(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p128)"] +#[doc = "Unsigned rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u16)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53249,20 +52044,21 @@ pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t { - unsafe { transmute(a) } +pub fn vrshr_n_u16(a: uint16x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + vrshl_u16(a, vdup_n_s16(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p128)"] +#[doc = "Unsigned rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u16)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53271,23 +52067,21 @@ pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t { - unsafe { - let ret_val: int32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vrshrq_n_u16(a: uint16x8_t) -> uint16x8_t { + static_assert!(N >= 1 && N <= 16); + vrshlq_u16(a, vdupq_n_s16(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p128)"] +#[doc = "Unsigned rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u32)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53296,20 +52090,21 @@ pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t { - unsafe { transmute(a) } +pub fn vrshr_n_u32(a: uint32x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + vrshl_u32(a, vdup_n_s32(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p128)"] +#[doc = "Unsigned rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u32)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53318,23 +52113,21 @@ pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t { - unsafe { - let ret_val: int64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vrshrq_n_u32(a: uint32x4_t) -> uint32x4_t { + static_assert!(N >= 1 && N <= 32); + vrshlq_u32(a, vdupq_n_s32(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p128)"] +#[doc = "Unsigned rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u64)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53343,20 +52136,21 @@ pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t { - unsafe { transmute(a) } +pub fn vrshr_n_u64(a: uint64x1_t) -> uint64x1_t { + static_assert!(N >= 1 && N <= 64); + vrshl_u64(a, vdup_n_s64(-N as _)) } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p128)"] +#[doc = "Unsigned rounding shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u64)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(urshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53365,27 +52159,126 @@ pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t { - unsafe { - let ret_val: uint8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) +pub fn vrshrq_n_u64(a: uint64x2_t) -> uint64x2_t { + static_assert!(N >= 1 && N <= 64); + vrshlq_u64(a, vdupq_n_s64(-N as _)) +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vrshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v8i8")] + fn _vrshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t; } + unsafe { _vrshrn_n_s16(a, const { int16x8_t([-N as i16; 8]) }) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p128)"] +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s32)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vrshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v4i16")] + fn _vrshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t; + } + unsafe { _vrshrn_n_s32(a, const { int32x4_t([-N; 4]) }) } +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub fn vrshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v2i32")] + fn _vrshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t; + } + unsafe { _vrshrn_n_s64(a, const { int64x2_t([-N as i64; 2]) }) } +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(rshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.rshrn.v8i8" + )] + fn _vrshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t; + } + unsafe { _vrshrn_n_s16(a, N) } +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(rshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.rshrn.v4i16" + )] + fn _vrshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t; + } + unsafe { _vrshrn_n_s32(a, N) } +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(rshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub fn vrshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.rshrn.v2i32" + )] + fn _vrshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t; + } + unsafe { _vrshrn_n_s64(a, N) } +} +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rshrn, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53394,20 +52287,21 @@ pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t { - unsafe { transmute(a) } +pub fn vrshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe { transmute(vrshrn_n_s16::(transmute(a))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p128)"] +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_u32)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rshrn, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53416,23 +52310,21 @@ pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t { - unsafe { - let ret_val: uint16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vrshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe { transmute(vrshrn_n_s32::(transmute(a))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p128)"] +#[doc = "Rounding shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_u64)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rshrn, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53441,66 +52333,79 @@ pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t { - unsafe { transmute(a) } +pub fn vrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe { transmute(vrshrn_n_s64::(transmute(a))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p128)"] +#[doc = "Reciprocal square-root estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f16)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frsqrte) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t { - unsafe { - let ret_val: uint32x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +#[cfg(not(target_arch = "arm64ec"))] +pub fn vrsqrte_f16(a: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrte.v4f16" + )] + fn _vrsqrte_f16(a: float16x4_t) -> float16x4_t; } + unsafe { _vrsqrte_f16(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p128)"] +#[doc = "Reciprocal square-root estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_f16)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frsqrte) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t { - unsafe { transmute(a) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vrsqrteq_f16(a: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrte.v8f16" + )] + fn _vrsqrteq_f16(a: float16x8_t) -> float16x8_t; + } + unsafe { _vrsqrteq_f16(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p128)"] +#[doc = "Reciprocal square-root estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f32)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frsqrte) )] #[cfg_attr( not(target_arch = "arm"), @@ -53510,22 +52415,26 @@ pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t { - unsafe { - let ret_val: uint64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) +pub fn vrsqrte_f32(a: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrte.v2f32" + )] + fn _vrsqrte_f32(a: float32x2_t) -> float32x2_t; } + unsafe { _vrsqrte_f32(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p128)"] +#[doc = "Reciprocal square-root estimate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_f32)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frsqrte) )] #[cfg_attr( not(target_arch = "arm"), @@ -53535,19 +52444,26 @@ pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t { - unsafe { transmute(a) } +pub fn vrsqrteq_f32(a: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrte.v4f32" + )] + fn _vrsqrteq_f32(a: float32x4_t) -> float32x4_t; + } + unsafe { _vrsqrteq_f32(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p128)"] +#[doc = "Unsigned reciprocal square root estimate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_u32)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ursqrte) )] #[cfg_attr( not(target_arch = "arm"), @@ -53557,26 +52473,26 @@ pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t { - unsafe { - let ret_val: poly8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) +pub fn vrsqrte_u32(a: uint32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ursqrte.v2i32" + )] + fn _vrsqrte_u32(a: uint32x2_t) -> uint32x2_t; } + unsafe { _vrsqrte_u32(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p128)"] +#[doc = "Unsigned reciprocal square root estimate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_u32)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ursqrte) )] #[cfg_attr( not(target_arch = "arm"), @@ -53586,66 +52502,86 @@ pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t { - unsafe { transmute(a) } +pub fn vrsqrteq_u32(a: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ursqrte.v4i32" + )] + fn _vrsqrteq_u32(a: uint32x4_t) -> uint32x4_t; + } + unsafe { _vrsqrteq_u32(a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p128)"] +#[doc = "Floating-point reciprocal square root step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f16)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frsqrts) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t { - unsafe { - let ret_val: poly16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +#[cfg(not(target_arch = "arm64ec"))] +pub fn vrsqrts_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v4f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrts.v4f16" + )] + fn _vrsqrts_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; } + unsafe { _vrsqrts_f16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p128)"] +#[doc = "Floating-point reciprocal square root step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsq_f16)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frsqrts) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t { - unsafe { transmute(a) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vrsqrtsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v8f16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrts.v8f16" + )] + fn _vrsqrtsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + } + unsafe { _vrsqrtsq_f16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p128)"] +#[doc = "Floating-point reciprocal square root step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f32)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frsqrts) )] #[cfg_attr( not(target_arch = "arm"), @@ -53655,22 +52591,26 @@ pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t { - unsafe { - let ret_val: poly64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) +pub fn vrsqrts_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v2f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrts.v2f32" + )] + fn _vrsqrts_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; } + unsafe { _vrsqrts_f32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)"] +#[doc = "Floating-point reciprocal square root step"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsq_f32)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(frsqrts) )] #[cfg_attr( not(target_arch = "arm"), @@ -53680,20 +52620,28 @@ pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t { - unsafe { transmute(a) } +pub fn vrsqrtsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v4f32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.frsqrts.v4f32" + )] + fn _vrsqrtsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + unsafe { _vrsqrtsq_f32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)"] +#[doc = "Signed rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s8)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srsra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53702,23 +52650,21 @@ pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t { - unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) - } +pub fn vrsra_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe { simd_add(a, vrshr_n_s8::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)"] +#[doc = "Signed rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s8)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srsra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53727,20 +52673,21 @@ pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 { - unsafe { transmute(a) } +pub fn vrsraq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert!(N >= 1 && N <= 8); + unsafe { simd_add(a, vrshrq_n_s8::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)"] +#[doc = "Signed rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s16)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srsra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53749,24 +52696,21 @@ pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 { - unsafe { - let a: int8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) - } +pub fn vrsra_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe { simd_add(a, vrshr_n_s16::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)"] +#[doc = "Signed rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s16)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srsra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53775,20 +52719,21 @@ pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t { - unsafe { transmute(a) } +pub fn vrsraq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert!(N >= 1 && N <= 16); + unsafe { simd_add(a, vrshrq_n_s16::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)"] +#[doc = "Signed rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s32)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srsra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53797,25 +52742,21 @@ pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t { - unsafe { - let a: int8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vrsra_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe { simd_add(a, vrshr_n_s32::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)"] +#[doc = "Signed rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s32)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srsra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53824,20 +52765,21 @@ pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t { - unsafe { transmute(a) } +pub fn vrsraq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert!(N >= 1 && N <= 32); + unsafe { simd_add(a, vrshrq_n_s32::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)"] +#[doc = "Signed rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s64)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srsra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53846,23 +52788,21 @@ pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t { - unsafe { - let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) - } +pub fn vrsra_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + static_assert!(N >= 1 && N <= 64); + unsafe { simd_add(a, vrshr_n_s64::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)"] +#[doc = "Signed rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s64)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(srsra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53871,20 +52811,21 @@ pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 { - unsafe { transmute(a) } +pub fn vrsraq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert!(N >= 1 && N <= 64); + unsafe { simd_add(a, vrshrq_n_s64::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)"] +#[doc = "Unsigned rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u8)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ursra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53893,23 +52834,21 @@ pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 { - unsafe { - let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) - } +pub fn vrsra_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe { simd_add(a, vrshr_n_u8::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)"] +#[doc = "Unsigned rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u8)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ursra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53918,20 +52857,21 @@ pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t { - unsafe { transmute(a) } +pub fn vrsraq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert!(N >= 1 && N <= 8); + unsafe { simd_add(a, vrshrq_n_u8::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)"] +#[doc = "Unsigned rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u16)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ursra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53940,24 +52880,21 @@ pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t { - unsafe { - let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } +pub fn vrsra_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe { simd_add(a, vrshr_n_u16::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)"] +#[doc = "Unsigned rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u16)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ursra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53966,20 +52903,21 @@ pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t { - unsafe { transmute(a) } +pub fn vrsraq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert!(N >= 1 && N <= 16); + unsafe { simd_add(a, vrshrq_n_u16::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)"] +#[doc = "Unsigned rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u32)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ursra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -53988,23 +52926,21 @@ pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t { - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) - } +pub fn vrsra_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe { simd_add(a, vrshr_n_u32::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)"] +#[doc = "Unsigned rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u32)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ursra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54013,20 +52949,21 @@ pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 { - unsafe { transmute(a) } +pub fn vrsraq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert!(N >= 1 && N <= 32); + unsafe { simd_add(a, vrshrq_n_u32::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)"] +#[doc = "Unsigned rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u64)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ursra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54035,23 +52972,21 @@ pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 { - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) - } +pub fn vrsra_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + static_assert!(N >= 1 && N <= 64); + unsafe { simd_add(a, vrshr_n_u64::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)"] +#[doc = "Unsigned rounding shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u64)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(ursra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54060,19 +52995,19 @@ pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t { - unsafe { transmute(a) } +pub fn vrsraq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert!(N >= 1 && N <= 64); + unsafe { simd_add(a, vrshrq_n_u64::(b)) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)"] +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_s16)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rsubhn) )] #[cfg_attr( not(target_arch = "arm"), @@ -54082,23 +53017,26 @@ pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t { - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) +pub fn vrsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.rsubhn.v8i8" + )] + fn _vrsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t; } + unsafe { _vrsubhn_s16(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)"] +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_s32)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rsubhn) )] #[cfg_attr( not(target_arch = "arm"), @@ -54108,19 +53046,26 @@ pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 { - unsafe { transmute(a) } +pub fn vrsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.rsubhn.v4i16" + )] + fn _vrsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t; + } + unsafe { _vrsubhn_s32(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)"] +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_s64)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rsubhn) )] #[cfg_attr( not(target_arch = "arm"), @@ -54130,22 +53075,26 @@ pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 { - unsafe { - let a: int64x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) +pub fn vrsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.rsubhn.v2i32" + )] + fn _vrsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t; } + unsafe { _vrsubhn_s64(a, b) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)"] +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u16)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rsubhn) )] #[cfg_attr( not(target_arch = "arm"), @@ -54155,19 +53104,18 @@ pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t { - unsafe { transmute(a) } +pub fn vrsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { + unsafe { transmute(vrsubhn_s16(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)"] +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u32)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rsubhn) )] #[cfg_attr( not(target_arch = "arm"), @@ -54177,22 +53125,18 @@ pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) - } +pub fn vrsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { + unsafe { transmute(vrsubhn_s32(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)"] +#[doc = "Rounding subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u64)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(rsubhn) )] #[cfg_attr( not(target_arch = "arm"), @@ -54202,46 +53146,101 @@ pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 { - unsafe { transmute(a) } +pub fn vrsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { + unsafe { transmute(vrsubhn_s64(transmute(a), transmute(b))) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) +)] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vset_lane_f16(a: f16, b: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f16)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vset_lane_f16(a: f16, b: float16x4_t) -> float16x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float16x4_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f16)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vsetq_lane_f16(a: f16, b: float16x8_t) -> float16x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_insert!(b, LANE as u32, a) } +} +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f16)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) )] -pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 { +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vsetq_lane_f16(a: f16, b: float16x8_t) -> float16x8_t { + static_assert_uimm_bits!(LANE, 3); unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: float16x8_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54250,20 +53249,22 @@ pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t { - unsafe { transmute(a) } +pub fn vset_lane_f32(a: f32, b: float32x2_t) -> float32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54272,25 +53273,26 @@ pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t { +pub fn vset_lane_f32(a: f32, b: float32x2_t) -> float32x2_t { + static_assert_uimm_bits!(LANE, 1); unsafe { - let a: uint8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly64x2_t = transmute(a); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: float32x2_t = simd_insert!(b, LANE as u32, a); simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54299,20 +53301,22 @@ pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t { - unsafe { transmute(a) } +pub fn vsetq_lane_f32(a: f32, b: float32x4_t) -> float32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54321,23 +53325,26 @@ pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t { +pub fn vsetq_lane_f32(a: f32, b: float32x4_t) -> float32x4_t { + static_assert_uimm_bits!(LANE, 2); unsafe { - let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: float32x4_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s8)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54346,20 +53353,22 @@ pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 { - unsafe { transmute(a) } +pub fn vset_lane_s8(a: i8, b: int8x8_t) -> int8x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s8)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54368,23 +53377,26 @@ pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 { +pub fn vset_lane_s8(a: i8, b: int8x8_t) -> int8x8_t { + static_assert_uimm_bits!(LANE, 3); unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s8)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54393,20 +53405,22 @@ pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t { - unsafe { transmute(a) } +pub fn vsetq_lane_s8(a: i8, b: int8x16_t) -> int8x16_t { + static_assert_uimm_bits!(LANE, 4); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s8)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54415,24 +53429,31 @@ pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t { +pub fn vsetq_lane_s8(a: i8, b: int8x16_t) -> int8x16_t { + static_assert_uimm_bits!(LANE, 4); unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x16_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s16)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54441,20 +53462,22 @@ pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t { - unsafe { transmute(a) } +pub fn vset_lane_s16(a: i16, b: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s16)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54463,23 +53486,26 @@ pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t { +pub fn vset_lane_s16(a: i16, b: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(LANE, 2); unsafe { - let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int16x4_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s16)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54488,20 +53514,22 @@ pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 { - unsafe { transmute(a) } +pub fn vsetq_lane_s16(a: i16, b: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s16)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54510,23 +53538,26 @@ pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 { +pub fn vsetq_lane_s16(a: i16, b: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(LANE, 3); unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) + let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int16x8_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54535,20 +53566,22 @@ pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t { - unsafe { transmute(a) } +pub fn vset_lane_s32(a: i32, b: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54557,24 +53590,26 @@ pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t { +pub fn vset_lane_s32(a: i32, b: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let ret_val: poly64x2_t = transmute(a); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int32x2_t = simd_insert!(b, LANE as u32, a); simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54583,20 +53618,22 @@ pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 { - unsafe { transmute(a) } +pub fn vsetq_lane_s32(a: i32, b: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54605,23 +53642,26 @@ pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 { +pub fn vsetq_lane_s32(a: i32, b: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) + let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: int32x4_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s64)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54630,20 +53670,22 @@ pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t { - unsafe { transmute(a) } +pub fn vsetq_lane_s64(a: i64, b: int64x2_t) -> int64x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s64)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54652,23 +53694,26 @@ pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t { +pub fn vsetq_lane_s64(a: i64, b: int64x2_t) -> int64x2_t { + static_assert_uimm_bits!(LANE, 1); unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) + let b: int64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: int64x2_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u8)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54677,20 +53722,22 @@ pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 { - unsafe { transmute(a) } +pub fn vset_lane_u8(a: u8, b: uint8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u8)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54699,24 +53746,26 @@ pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 { +pub fn vset_lane_u8(a: u8, b: uint8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(LANE, 3); unsafe { - let a: poly8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x8_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u8)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54725,20 +53774,22 @@ pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t { - unsafe { transmute(a) } +pub fn vsetq_lane_u8(a: u8, b: uint8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(LANE, 4); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u8)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54747,25 +53798,31 @@ pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t { +pub fn vsetq_lane_u8(a: u8, b: uint8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(LANE, 4); unsafe { - let a: poly8x16_t = - simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly64x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint8x16_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u16)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54774,20 +53831,22 @@ pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t { - unsafe { transmute(a) } +pub fn vset_lane_u16(a: u16, b: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u16)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54796,23 +53855,26 @@ pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t { +pub fn vset_lane_u16(a: u16, b: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(LANE, 2); unsafe { - let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - transmute(a) + let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint16x4_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u16)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54821,20 +53883,22 @@ pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 { - unsafe { transmute(a) } +pub fn vsetq_lane_u16(a: u16, b: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u16)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54843,23 +53907,26 @@ pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 { +pub fn vsetq_lane_u16(a: u16, b: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(LANE, 3); unsafe { - let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - transmute(a) + let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: uint16x8_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p16)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54868,20 +53935,22 @@ pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t { - unsafe { transmute(a) } +pub fn vset_lane_u32(a: u32, b: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p16)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54890,24 +53959,26 @@ pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t { +pub fn vset_lane_u32(a: u32, b: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(LANE, 1); unsafe { - let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly64x2_t = transmute(a); + let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint32x2_t = simd_insert!(b, LANE as u32, a); simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54916,20 +53987,22 @@ pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t { - unsafe { transmute(a) } +pub fn vsetq_lane_u32(a: u32, b: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54938,23 +54011,26 @@ pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t { +pub fn vsetq_lane_u32(a: u32, b: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 2); unsafe { - let ret_val: int8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: uint32x4_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u64)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54963,20 +54039,22 @@ pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t { - unsafe { transmute(a) } +pub fn vsetq_lane_u64(a: u64, b: uint64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u64)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -54985,23 +54063,26 @@ pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t { +pub fn vsetq_lane_u64(a: u64, b: uint64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(LANE, 1); unsafe { - let ret_val: int16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: uint64x2_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p8)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55010,20 +54091,22 @@ pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t { - unsafe { transmute(a) } +pub fn vset_lane_p8(a: p8, b: poly8x8_t) -> poly8x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p8)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55032,23 +54115,26 @@ pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t { +pub fn vset_lane_p8(a: p8, b: poly8x8_t) -> poly8x8_t { + static_assert_uimm_bits!(LANE, 3); unsafe { - let ret_val: int32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x8_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p8)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55057,20 +54143,22 @@ pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t { - unsafe { transmute(a) } +pub fn vsetq_lane_p8(a: p8, b: poly8x16_t) -> poly8x16_t { + static_assert_uimm_bits!(LANE, 4); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p8)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55079,23 +54167,31 @@ pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t { +pub fn vsetq_lane_p8(a: p8, b: poly8x16_t) -> poly8x16_t { + static_assert_uimm_bits!(LANE, 4); unsafe { - let ret_val: uint8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let b: poly8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly8x16_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!( + ret_val, + ret_val, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p16)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55104,20 +54200,22 @@ pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t { - unsafe { transmute(a) } +pub fn vset_lane_p16(a: p16, b: poly16x4_t) -> poly16x4_t { + static_assert_uimm_bits!(LANE, 2); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p16)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55126,23 +54224,26 @@ pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t { +pub fn vset_lane_p16(a: p16, b: poly16x4_t) -> poly16x4_t { + static_assert_uimm_bits!(LANE, 2); unsafe { - let ret_val: uint16x4_t = transmute(a); + let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let ret_val: poly16x4_t = simd_insert!(b, LANE as u32, a); simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p16)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55151,20 +54252,22 @@ pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t { - unsafe { transmute(a) } +pub fn vsetq_lane_p16(a: p16, b: poly16x8_t) -> poly16x8_t { + static_assert_uimm_bits!(LANE, 3); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p16)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55173,23 +54276,25 @@ pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t { +pub fn vsetq_lane_p16(a: p16, b: poly16x8_t) -> poly16x8_t { + static_assert_uimm_bits!(LANE, 3); unsafe { - let ret_val: uint32x2_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: poly16x8_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p64)"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55198,20 +54303,21 @@ pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t { - unsafe { transmute(a) } +pub fn vset_lane_p64(a: p64, b: poly64x1_t) -> poly64x1_t { + static_assert!(LANE == 0); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s64)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55220,23 +54326,21 @@ pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t { - unsafe { - let ret_val: poly8x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } +pub fn vset_lane_s64(a: i64, b: int64x1_t) -> int64x1_t { + static_assert!(LANE == 0); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u64)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55245,20 +54349,22 @@ pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t { - unsafe { transmute(a) } +pub fn vset_lane_u64(a: u64, b: uint64x1_t) -> uint64x1_t { + static_assert!(LANE == 0); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p64)"] #[inline] -#[cfg(target_endian = "big")] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55267,23 +54373,22 @@ pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t { - unsafe { - let ret_val: poly16x4_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } +pub fn vsetq_lane_p64(a: p64, b: poly64x2_t) -> poly64x2_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { simd_insert!(b, LANE as u32, a) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p64)"] +#[doc = "Insert vector element from another vector element"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p64)"] #[inline] -#[cfg(target_endian = "little")] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + assert_instr(nop, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55292,440 +54397,809 @@ pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 { - unsafe { transmute(a) } -} -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p64)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 { +pub fn vsetq_lane_p64(a: p64, b: poly64x2_t) -> poly64x2_t { + static_assert_uimm_bits!(LANE, 1); unsafe { - let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); - transmute(a) + let b: poly64x2_t = simd_shuffle!(b, b, [1, 0]); + let ret_val: poly64x2_t = simd_insert!(b, LANE as u32, a); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p64)"] +#[doc = "SHA1 hash update accelerator, choose."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1cq_u32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha1c))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] -pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t { - unsafe { transmute(a) } +pub fn vsha1cq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha1c" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1c")] + fn _vsha1cq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t; + } + unsafe { _vsha1cq_u32(hash_abcd, hash_e, wk) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p64)"] +#[doc = "SHA1 hash update accelerator, choose."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1cq_u32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha1c))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] -pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t { +pub fn vsha1cq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha1c" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1c")] + fn _vsha1cq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t; + } unsafe { - let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) + let hash_abcd: uint32x4_t = simd_shuffle!(hash_abcd, hash_abcd, [3, 2, 1, 0]); + let wk: uint32x4_t = simd_shuffle!(wk, wk, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsha1cq_u32(hash_abcd, hash_e, wk); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p64)"] +#[doc = "SHA1 fixed rotate."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1h_u32)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha1h))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] +pub fn vsha1h_u32(hash_e: u32) -> u32 { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha1h" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1h")] + fn _vsha1h_u32(hash_e: u32) -> u32; + } + unsafe { _vsha1h_u32(hash_e) } +} +#[doc = "SHA1 hash update accelerator, majority"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1mq_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "sha2")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(sha1m))] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t { - unsafe { transmute(a) } +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") +)] +pub fn vsha1mq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha1m" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1m")] + fn _vsha1mq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t; + } + unsafe { _vsha1mq_u32(hash_abcd, hash_e, wk) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p64)"] +#[doc = "SHA1 hash update accelerator, majority"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1mq_u32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha1m))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] -pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t { +pub fn vsha1mq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha1m" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1m")] + fn _vsha1mq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t; + } unsafe { - let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let hash_abcd: uint32x4_t = simd_shuffle!(hash_abcd, hash_abcd, [3, 2, 1, 0]); + let wk: uint32x4_t = simd_shuffle!(wk, wk, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsha1mq_u32(hash_abcd, hash_e, wk); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p64)"] +#[doc = "SHA1 hash update accelerator, parity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1pq_u32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha1p))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] -pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t { - unsafe { transmute(a) } +pub fn vsha1pq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha1p" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1p")] + fn _vsha1pq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t; + } + unsafe { _vsha1pq_u32(hash_abcd, hash_e, wk) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p64)"] +#[doc = "SHA1 hash update accelerator, parity"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1pq_u32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha1p))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] -pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t { +pub fn vsha1pq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha1p" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1p")] + fn _vsha1pq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t; + } unsafe { - let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: int32x4_t = transmute(a); + let hash_abcd: uint32x4_t = simd_shuffle!(hash_abcd, hash_abcd, [3, 2, 1, 0]); + let wk: uint32x4_t = simd_shuffle!(wk, wk, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsha1pq_u32(hash_abcd, hash_e, wk); simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p64)"] +#[doc = "SHA1 schedule update accelerator, first part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1su0q_u32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha1su0))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] -pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t { - unsafe { transmute(a) } +pub fn vsha1su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha1su0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1su0")] + fn _vsha1su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t; + } + unsafe { _vsha1su0q_u32(w0_3, w4_7, w8_11) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p64)"] +#[doc = "SHA1 schedule update accelerator, first part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1su0q_u32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha1su0))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] -pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t { +pub fn vsha1su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha1su0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1su0")] + fn _vsha1su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t; + } unsafe { - let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) + let w0_3: uint32x4_t = simd_shuffle!(w0_3, w0_3, [3, 2, 1, 0]); + let w4_7: uint32x4_t = simd_shuffle!(w4_7, w4_7, [3, 2, 1, 0]); + let w8_11: uint32x4_t = simd_shuffle!(w8_11, w8_11, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsha1su0q_u32(w0_3, w4_7, w8_11); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p64)"] +#[doc = "SHA1 schedule update accelerator, second part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1su1q_u32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha1su1))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] -pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t { - unsafe { transmute(a) } +pub fn vsha1su1q_u32(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha1su1" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1su1")] + fn _vsha1su1q_u32(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t; + } + unsafe { _vsha1su1q_u32(tw0_3, w12_15) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p64)"] +#[doc = "SHA1 schedule update accelerator, second part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1su1q_u32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha1su1))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] -pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t { +pub fn vsha1su1q_u32(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha1su1" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1su1")] + fn _vsha1su1q_u32(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t; + } unsafe { - let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let tw0_3: uint32x4_t = simd_shuffle!(tw0_3, tw0_3, [3, 2, 1, 0]); + let w12_15: uint32x4_t = simd_shuffle!(w12_15, w12_15, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsha1su1q_u32(tw0_3, w12_15); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p64)"] +#[doc = "SHA1 schedule update accelerator, upper part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256h2q_u32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha256h2))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] -pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t { - unsafe { transmute(a) } +pub fn vsha256h2q_u32(hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha256h2" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256h2")] + fn _vsha256h2q_u32( + hash_abcd: uint32x4_t, + hash_efgh: uint32x4_t, + wk: uint32x4_t, + ) -> uint32x4_t; + } + unsafe { _vsha256h2q_u32(hash_abcd, hash_efgh, wk) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p64)"] +#[doc = "SHA1 schedule update accelerator, upper part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256h2q_u32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha256h2))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] -pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t { +pub fn vsha256h2q_u32(hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha256h2" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256h2")] + fn _vsha256h2q_u32( + hash_abcd: uint32x4_t, + hash_efgh: uint32x4_t, + wk: uint32x4_t, + ) -> uint32x4_t; + } unsafe { - let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: uint32x4_t = transmute(a); + let hash_abcd: uint32x4_t = simd_shuffle!(hash_abcd, hash_abcd, [3, 2, 1, 0]); + let hash_efgh: uint32x4_t = simd_shuffle!(hash_efgh, hash_efgh, [3, 2, 1, 0]); + let wk: uint32x4_t = simd_shuffle!(wk, wk, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsha256h2q_u32(hash_abcd, hash_efgh, wk); simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p64)"] +#[doc = "SHA1 schedule update accelerator, first part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256hq_u32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha256h))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] -pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t { - unsafe { transmute(a) } +pub fn vsha256hq_u32(hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha256h" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256h")] + fn _vsha256hq_u32( + hash_abcd: uint32x4_t, + hash_efgh: uint32x4_t, + wk: uint32x4_t, + ) -> uint32x4_t; + } + unsafe { _vsha256hq_u32(hash_abcd, hash_efgh, wk) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p64)"] +#[doc = "SHA1 schedule update accelerator, first part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256hq_u32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha256h))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] -pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t { +pub fn vsha256hq_u32(hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha256h" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256h")] + fn _vsha256hq_u32( + hash_abcd: uint32x4_t, + hash_efgh: uint32x4_t, + wk: uint32x4_t, + ) -> uint32x4_t; + } unsafe { - let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly8x16_t = transmute(a); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) + let hash_abcd: uint32x4_t = simd_shuffle!(hash_abcd, hash_abcd, [3, 2, 1, 0]); + let hash_efgh: uint32x4_t = simd_shuffle!(hash_efgh, hash_efgh, [3, 2, 1, 0]); + let wk: uint32x4_t = simd_shuffle!(wk, wk, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsha256hq_u32(hash_abcd, hash_efgh, wk); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p64)"] +#[doc = "SHA256 schedule update accelerator, first part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256su0q_u32)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha256su0))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] -pub fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t { - unsafe { transmute(a) } +pub fn vsha256su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha256su0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256su0")] + fn _vsha256su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t; + } + unsafe { _vsha256su0q_u32(w0_3, w4_7) } } -#[doc = "Vector reinterpret cast operation"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p64)"] +#[doc = "SHA256 schedule update accelerator, first part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256su0q_u32)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,aes")] +#[target_feature(enable = "sha2")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(test, assert_instr(sha256su0))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") )] +pub fn vsha256su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha256su0" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256su0")] + fn _vsha256su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t; + } + unsafe { + let w0_3: uint32x4_t = simd_shuffle!(w0_3, w0_3, [3, 2, 1, 0]); + let w4_7: uint32x4_t = simd_shuffle!(w4_7, w4_7, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsha256su0q_u32(w0_3, w4_7); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + } +} +#[doc = "SHA256 schedule update accelerator, second part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256su1q_u32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "sha2")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(sha256su1))] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t { +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") +)] +pub fn vsha256su1q_u32(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha256su1" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256su1")] + fn _vsha256su1q_u32(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t) + -> uint32x4_t; + } + unsafe { _vsha256su1q_u32(tw0_3, w8_11, w12_15) } +} +#[doc = "SHA256 schedule update accelerator, second part."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256su1q_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "sha2")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(test, assert_instr(sha256su1))] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") +)] +pub fn vsha256su1q_u32(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.crypto.sha256su1" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256su1")] + fn _vsha256su1q_u32(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t) + -> uint32x4_t; + } unsafe { - let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]); - let ret_val: poly16x8_t = transmute(a); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let tw0_3: uint32x4_t = simd_shuffle!(tw0_3, tw0_3, [3, 2, 1, 0]); + let w8_11: uint32x4_t = simd_shuffle!(w8_11, w8_11, [3, 2, 1, 0]); + let w12_15: uint32x4_t = simd_shuffle!(w12_15, w12_15, [3, 2, 1, 0]); + let ret_val: uint32x4_t = _vsha256su1q_u32(tw0_3, w8_11, w12_15); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_p8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v16i8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v16i8")] + fn _vshiftlins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t; + } + unsafe { _vshiftlins_v16i8(a, b, const { int8x16_t([N as i8; 16]) }) } +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v1i64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v1i64")] + fn _vshiftlins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t; + } + unsafe { _vshiftlins_v1i64(a, b, const { int64x1_t([N as i64; 1]) }) } +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i32")] + fn _vshiftlins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t; + } + unsafe { _vshiftlins_v2i32(a, b, const { int32x2_t([N; 2]) }) } +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v2i64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i64")] + fn _vshiftlins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t; + } + unsafe { _vshiftlins_v2i64(a, b, const { int64x2_t([N as i64; 2]) }) } +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i16")] + fn _vshiftlins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t; + } + unsafe { _vshiftlins_v4i16(a, b, const { int16x4_t([N as i16; 4]) }) } +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v4i32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i32")] + fn _vshiftlins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t; + } + unsafe { _vshiftlins_v4i32(a, b, const { int32x4_t([N; 4]) }) } +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v8i16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i16")] + fn _vshiftlins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t; + } + unsafe { _vshiftlins_v8i16(a, b, const { int16x8_t([N as i16; 8]) }) } +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i8")] + fn _vshiftlins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; + } + unsafe { _vshiftlins_v8i8(a, b, const { int8x8_t([N as i8; 8]) }) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v16i8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v16i8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v16i8")] + fn _vshiftrins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t; + } + unsafe { _vshiftrins_v16i8(a, b, const { int8x16_t([-N as i8; 16]) }) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v1i64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v1i64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v1i64")] + fn _vshiftrins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t; + } + unsafe { _vshiftrins_v1i64(a, b, const { int64x1_t([-N as i64; 1]) }) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v2i32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i32")] + fn _vshiftrins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t; + } + unsafe { _vshiftrins_v2i32(a, b, const { int32x2_t([-N; 2]) }) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v2i64)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v2i64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i64")] + fn _vshiftrins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t; + } + unsafe { _vshiftrins_v2i64(a, b, const { int64x2_t([-N as i64; 2]) }) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v4i16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i16")] + fn _vshiftrins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t; + } + unsafe { _vshiftrins_v4i16(a, b, const { int16x4_t([-N as i16; 4]) }) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v4i32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v4i32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i32")] + fn _vshiftrins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t; + } + unsafe { _vshiftrins_v4i32(a, b, const { int32x4_t([-N; 4]) }) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v8i16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v8i16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i16")] + fn _vshiftrins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t; + } + unsafe { _vshiftrins_v8i16(a, b, const { int16x8_t([-N as i16; 8]) }) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v8i8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i8")] + fn _vshiftrins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; + } + unsafe { _vshiftrins_v8i8(a, b, const { int8x8_t([-N as i8; 8]) }) } +} +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s8)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev16) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55734,19 +55208,21 @@ pub fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev16_p8(a: poly8x8_t) -> poly8x8_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } +pub fn vshl_n_s8(a: int8x8_t) -> int8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shl(a, vdup_n_s8(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_s8)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev16) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55755,19 +55231,21 @@ pub fn vrev16_p8(a: poly8x8_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev16_s8(a: int8x8_t) -> int8x8_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } +pub fn vshlq_n_s8(a: int8x16_t) -> int8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shl(a, vdupq_n_s8(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_u8)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev16) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55776,19 +55254,21 @@ pub fn vrev16_s8(a: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev16_u8(a: uint8x8_t) -> uint8x8_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } +pub fn vshl_n_s16(a: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(N, 4); + unsafe { simd_shl(a, vdup_n_s16(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_p8)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev16) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55797,19 +55277,21 @@ pub fn vrev16_u8(a: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev16q_p8(a: poly8x16_t) -> poly8x16_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) } +pub fn vshlq_n_s16(a: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { simd_shl(a, vdupq_n_s16(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_s8)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev16) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55818,19 +55300,21 @@ pub fn vrev16q_p8(a: poly8x16_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev16q_s8(a: int8x16_t) -> int8x16_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) } +pub fn vshl_n_s32(a: int32x2_t) -> int32x2_t { + static_assert_uimm_bits!(N, 5); + unsafe { simd_shl(a, vdup_n_s32(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_u8)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev16) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55839,19 +55323,21 @@ pub fn vrev16q_s8(a: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev16q_u8(a: uint8x16_t) -> uint8x16_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) } +pub fn vshlq_n_s32(a: int32x4_t) -> int32x4_t { + static_assert_uimm_bits!(N, 5); + unsafe { simd_shl(a, vdupq_n_s32(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_p16)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev32) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55860,19 +55346,21 @@ pub fn vrev16q_u8(a: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev32_p16(a: poly16x4_t) -> poly16x4_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } +pub fn vshl_n_s64(a: int64x1_t) -> int64x1_t { + static_assert_uimm_bits!(N, 6); + unsafe { simd_shl(a, vdup_n_s64(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_p8)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev32) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55881,19 +55369,21 @@ pub fn vrev32_p16(a: poly16x4_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev32_p8(a: poly8x8_t) -> poly8x8_t { - unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } +pub fn vshlq_n_s64(a: int64x2_t) -> int64x2_t { + static_assert_uimm_bits!(N, 6); + unsafe { simd_shl(a, vdupq_n_s64(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_s16)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev32) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55902,19 +55392,21 @@ pub fn vrev32_p8(a: poly8x8_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev32_s16(a: int16x4_t) -> int16x4_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } +pub fn vshl_n_u8(a: uint8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shl(a, vdup_n_u8(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_s8)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev32) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55923,19 +55415,21 @@ pub fn vrev32_s16(a: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev32_s8(a: int8x8_t) -> int8x8_t { - unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } +pub fn vshlq_n_u8(a: uint8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe { simd_shl(a, vdupq_n_u8(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_u16)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev32) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55944,19 +55438,21 @@ pub fn vrev32_s8(a: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev32_u16(a: uint16x4_t) -> uint16x4_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } +pub fn vshl_n_u16(a: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 4); + unsafe { simd_shl(a, vdup_n_u16(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_u8)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev32) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55965,19 +55461,21 @@ pub fn vrev32_u16(a: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev32_u8(a: uint8x8_t) -> uint8x8_t { - unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } +pub fn vshlq_n_u16(a: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { simd_shl(a, vdupq_n_u16(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_p16)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev32) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -55986,19 +55484,21 @@ pub fn vrev32_u8(a: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev32q_p16(a: poly16x8_t) -> poly16x8_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } +pub fn vshl_n_u32(a: uint32x2_t) -> uint32x2_t { + static_assert_uimm_bits!(N, 5); + unsafe { simd_shl(a, vdup_n_u32(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_p8)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev32) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56007,19 +55507,21 @@ pub fn vrev32q_p16(a: poly16x8_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev32q_p8(a: poly8x16_t) -> poly8x16_t { - unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) } +pub fn vshlq_n_u32(a: uint32x4_t) -> uint32x4_t { + static_assert_uimm_bits!(N, 5); + unsafe { simd_shl(a, vdupq_n_u32(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_s16)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev32) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56028,19 +55530,21 @@ pub fn vrev32q_p8(a: poly8x16_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev32q_s16(a: int16x8_t) -> int16x8_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } +pub fn vshl_n_u64(a: uint64x1_t) -> uint64x1_t { + static_assert_uimm_bits!(N, 6); + unsafe { simd_shl(a, vdup_n_u64(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_s8)"] +#[doc = "Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev32) + assert_instr(shl, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56049,18 +55553,19 @@ pub fn vrev32q_s16(a: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev32q_s8(a: int8x16_t) -> int8x16_t { - unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) } +pub fn vshlq_n_u64(a: uint64x2_t) -> uint64x2_t { + static_assert_uimm_bits!(N, 6); + unsafe { simd_shl(a, vdupq_n_u64(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_u16)"] +#[doc = "Signed Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev32) + assert_instr(sshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -56070,18 +55575,26 @@ pub fn vrev32q_s8(a: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev32q_u16(a: uint16x8_t) -> uint16x8_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } +pub fn vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sshl.v8i8" + )] + fn _vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } + unsafe { _vshl_s8(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_u8)"] +#[doc = "Signed Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev32) + assert_instr(sshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -56091,18 +55604,26 @@ pub fn vrev32q_u16(a: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev32q_u8(a: uint8x16_t) -> uint8x16_t { - unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) } +pub fn vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sshl.v16i8" + )] + fn _vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } + unsafe { _vshlq_s8(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_f32)"] +#[doc = "Signed Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(sshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -56112,18 +55633,26 @@ pub fn vrev32q_u8(a: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64_f32(a: float32x2_t) -> float32x2_t { - unsafe { simd_shuffle!(a, a, [1, 0]) } +pub fn vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sshl.v4i16" + )] + fn _vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } + unsafe { _vshl_s16(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_p16)"] +#[doc = "Signed Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(sshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -56133,18 +55662,26 @@ pub fn vrev64_f32(a: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64_p16(a: poly16x4_t) -> poly16x4_t { - unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) } +pub fn vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sshl.v8i16" + )] + fn _vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } + unsafe { _vshlq_s16(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_p8)"] +#[doc = "Signed Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(sshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -56154,18 +55691,26 @@ pub fn vrev64_p16(a: poly16x4_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64_p8(a: poly8x8_t) -> poly8x8_t { - unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) } +pub fn vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sshl.v2i32" + )] + fn _vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } + unsafe { _vshl_s32(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s16)"] +#[doc = "Signed Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(sshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -56175,18 +55720,26 @@ pub fn vrev64_p8(a: poly8x8_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64_s16(a: int16x4_t) -> int16x4_t { - unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) } +pub fn vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sshl.v4i32" + )] + fn _vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } + unsafe { _vshlq_s32(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s32)"] +#[doc = "Signed Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(sshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -56196,18 +55749,26 @@ pub fn vrev64_s16(a: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64_s32(a: int32x2_t) -> int32x2_t { - unsafe { simd_shuffle!(a, a, [1, 0]) } +pub fn vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v1i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sshl.v1i64" + )] + fn _vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; + } + unsafe { _vshl_s64(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s8)"] +#[doc = "Signed Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(sshl) )] #[cfg_attr( not(target_arch = "arm"), @@ -56217,18 +55778,26 @@ pub fn vrev64_s32(a: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64_s8(a: int8x8_t) -> int8x8_t { - unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) } +pub fn vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.sshl.v2i64" + )] + fn _vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; + } + unsafe { _vshlq_s64(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u16)"] +#[doc = "Unsigned Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(ushl) )] #[cfg_attr( not(target_arch = "arm"), @@ -56238,18 +55807,26 @@ pub fn vrev64_s8(a: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64_u16(a: uint16x4_t) -> uint16x4_t { - unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) } +pub fn vshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ushl.v8i8" + )] + fn _vshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; + } + unsafe { _vshl_u8(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u32)"] +#[doc = "Unsigned Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(ushl) )] #[cfg_attr( not(target_arch = "arm"), @@ -56259,18 +55836,26 @@ pub fn vrev64_u16(a: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64_u32(a: uint32x2_t) -> uint32x2_t { - unsafe { simd_shuffle!(a, a, [1, 0]) } +pub fn vshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v16i8")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ushl.v16i8" + )] + fn _vshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; + } + unsafe { _vshlq_u8(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u8)"] +#[doc = "Unsigned Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(ushl) )] #[cfg_attr( not(target_arch = "arm"), @@ -56280,18 +55865,26 @@ pub fn vrev64_u32(a: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64_u8(a: uint8x8_t) -> uint8x8_t { - unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) } +pub fn vshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ushl.v4i16" + )] + fn _vshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; + } + unsafe { _vshl_u16(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_f32)"] +#[doc = "Unsigned Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(ushl) )] #[cfg_attr( not(target_arch = "arm"), @@ -56301,18 +55894,26 @@ pub fn vrev64_u8(a: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64q_f32(a: float32x4_t) -> float32x4_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } +pub fn vshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i16")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ushl.v8i16" + )] + fn _vshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; + } + unsafe { _vshlq_u16(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_p16)"] +#[doc = "Unsigned Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(ushl) )] #[cfg_attr( not(target_arch = "arm"), @@ -56322,18 +55923,26 @@ pub fn vrev64q_f32(a: float32x4_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64q_p16(a: poly16x8_t) -> poly16x8_t { - unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } +pub fn vshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ushl.v2i32" + )] + fn _vshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; + } + unsafe { _vshl_u32(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_p8)"] +#[doc = "Unsigned Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(ushl) )] #[cfg_attr( not(target_arch = "arm"), @@ -56343,18 +55952,26 @@ pub fn vrev64q_p16(a: poly16x8_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64q_p8(a: poly8x16_t) -> poly8x16_t { - unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) } +pub fn vshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i32")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ushl.v4i32" + )] + fn _vshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; + } + unsafe { _vshlq_u32(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s16)"] +#[doc = "Unsigned Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(ushl) )] #[cfg_attr( not(target_arch = "arm"), @@ -56364,18 +55981,26 @@ pub fn vrev64q_p8(a: poly8x16_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64q_s16(a: int16x8_t) -> int16x8_t { - unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } +pub fn vshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v1i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ushl.v1i64" + )] + fn _vshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; + } + unsafe { _vshl_u64(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s32)"] +#[doc = "Unsigned Shift left"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(ushl) )] #[cfg_attr( not(target_arch = "arm"), @@ -56385,19 +56010,28 @@ pub fn vrev64q_s16(a: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64q_s32(a: int32x4_t) -> int32x4_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } +pub fn vshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i64")] + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.ushl.v2i64" + )] + fn _vshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; + } + unsafe { _vshlq_u64(a, b) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s8)"] +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(sshll, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56406,19 +56040,21 @@ pub fn vrev64q_s32(a: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64q_s8(a: int8x16_t) -> int8x16_t { - unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) } +pub fn vshll_n_s16(a: int16x4_t) -> int32x4_t { + static_assert!(N >= 0 && N <= 16); + unsafe { simd_shl(simd_cast(a), vdupq_n_s32(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u16)"] +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(sshll, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56427,19 +56063,21 @@ pub fn vrev64q_s8(a: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64q_u16(a: uint16x8_t) -> uint16x8_t { - unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } +pub fn vshll_n_s32(a: int32x2_t) -> int64x2_t { + static_assert!(N >= 0 && N <= 32); + unsafe { simd_shl(simd_cast(a), vdupq_n_s64(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u32)"] +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s8", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(sshll, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56448,19 +56086,21 @@ pub fn vrev64q_u16(a: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64q_u32(a: uint32x4_t) -> uint32x4_t { - unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) } +pub fn vshll_n_s8(a: int8x8_t) -> int16x8_t { + static_assert!(N >= 0 && N <= 8); + unsafe { simd_shl(simd_cast(a), vdupq_n_s16(N as _)) } } -#[doc = "Reversing vector elements (swap endianness)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u8)"] +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(ushll, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56469,63 +56109,67 @@ pub fn vrev64q_u32(a: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrev64q_u8(a: uint8x16_t) -> uint8x16_t { - unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) } +pub fn vshll_n_u16(a: uint16x4_t) -> uint32x4_t { + static_assert!(N >= 0 && N <= 16); + unsafe { simd_shl(simd_cast(a), vdupq_n_u32(N as _)) } } -#[doc = "Reverse elements in 64-bit doublewords"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_f16)"] +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_u32)"] #[inline] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrev64))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(ushll, N = 2) )] -#[target_feature(enable = "neon,fp16")] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vrev64_f16(a: float16x4_t) -> float16x4_t { - unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) } +pub fn vshll_n_u32(a: uint32x2_t) -> uint64x2_t { + static_assert!(N >= 0 && N <= 32); + unsafe { simd_shl(simd_cast(a), vdupq_n_u64(N as _)) } } -#[doc = "Reverse elements in 64-bit doublewords"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_f16)"] +#[doc = "Signed shift left long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_u8)"] #[inline] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrev64))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u8", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rev64) + assert_instr(ushll, N = 2) )] -#[target_feature(enable = "neon,fp16")] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vrev64q_f16(a: float16x8_t) -> float16x8_t { - unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } +pub fn vshll_n_u8(a: uint8x8_t) -> uint16x8_t { + static_assert!(N >= 0 && N <= 8); + unsafe { simd_shl(simd_cast(a), vdupq_n_u16(N as _)) } } -#[doc = "Rounding halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s8)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srhadd) + assert_instr(sshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56534,27 +56178,22 @@ pub fn vrev64q_f16(a: float16x8_t) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srhadd.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i8")] - fn _vrhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - } - unsafe { _vrhadd_s8(a, b) } +pub fn vshr_n_s8(a: int8x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + let n: i32 = if N == 8 { 7 } else { N }; + unsafe { simd_shr(a, vdup_n_s8(n as _)) } } -#[doc = "Rounding halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_s8)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srhadd) + assert_instr(sshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56563,27 +56202,22 @@ pub fn vrhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srhadd.v16i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v16i8")] - fn _vrhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - } - unsafe { _vrhaddq_s8(a, b) } +pub fn vshrq_n_s8(a: int8x16_t) -> int8x16_t { + static_assert!(N >= 1 && N <= 8); + let n: i32 = if N == 8 { 7 } else { N }; + unsafe { simd_shr(a, vdupq_n_s8(n as _)) } } -#[doc = "Rounding halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s16)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srhadd) + assert_instr(sshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56592,27 +56226,22 @@ pub fn vrhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srhadd.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i16")] - fn _vrhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - } - unsafe { _vrhadd_s16(a, b) } +pub fn vshr_n_s16(a: int16x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + let n: i32 = if N == 16 { 15 } else { N }; + unsafe { simd_shr(a, vdup_n_s16(n as _)) } } -#[doc = "Rounding halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_s16)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srhadd) + assert_instr(sshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56621,27 +56250,22 @@ pub fn vrhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srhadd.v8i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i16")] - fn _vrhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - } - unsafe { _vrhaddq_s16(a, b) } +pub fn vshrq_n_s16(a: int16x8_t) -> int16x8_t { + static_assert!(N >= 1 && N <= 16); + let n: i32 = if N == 16 { 15 } else { N }; + unsafe { simd_shr(a, vdupq_n_s16(n as _)) } } -#[doc = "Rounding halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s32)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srhadd) + assert_instr(sshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56650,27 +56274,22 @@ pub fn vrhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srhadd.v2i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v2i32")] - fn _vrhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - } - unsafe { _vrhadd_s32(a, b) } +pub fn vshr_n_s32(a: int32x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + let n: i32 = if N == 32 { 31 } else { N }; + unsafe { simd_shr(a, vdup_n_s32(n as _)) } } -#[doc = "Rounding halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_s32)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srhadd) + assert_instr(sshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56679,27 +56298,22 @@ pub fn vrhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srhadd.v4i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i32")] - fn _vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - } - unsafe { _vrhaddq_s32(a, b) } +pub fn vshrq_n_s32(a: int32x4_t) -> int32x4_t { + static_assert!(N >= 1 && N <= 32); + let n: i32 = if N == 32 { 31 } else { N }; + unsafe { simd_shr(a, vdupq_n_s32(n as _)) } } -#[doc = "Rounding halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_u8)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urhadd) + assert_instr(sshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56708,27 +56322,22 @@ pub fn vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urhadd.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i8")] - fn _vrhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - } - unsafe { _vrhadd_u8(a, b) } +pub fn vshr_n_s64(a: int64x1_t) -> int64x1_t { + static_assert!(N >= 1 && N <= 64); + let n: i32 = if N == 64 { 63 } else { N }; + unsafe { simd_shr(a, vdup_n_s64(n as _)) } } -#[doc = "Rounding halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_u8)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urhadd) + assert_instr(sshr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56737,27 +56346,22 @@ pub fn vrhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urhadd.v16i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v16i8")] - fn _vrhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - } - unsafe { _vrhaddq_u8(a, b) } +pub fn vshrq_n_s64(a: int64x2_t) -> int64x2_t { + static_assert!(N >= 1 && N <= 64); + let n: i32 = if N == 64 { 63 } else { N }; + unsafe { simd_shr(a, vdupq_n_s64(n as _)) } } -#[doc = "Rounding halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_u16)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urhadd) + assert_instr(ushr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56766,27 +56370,26 @@ pub fn vrhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urhadd.v4i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i16")] - fn _vrhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - } - unsafe { _vrhadd_u16(a, b) } +pub fn vshr_n_u8(a: uint8x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + let n: i32 = if N == 8 { + return vdup_n_u8(0); + } else { + N + }; + unsafe { simd_shr(a, vdup_n_u8(n as _)) } } -#[doc = "Rounding halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_u16)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urhadd) + assert_instr(ushr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56795,27 +56398,26 @@ pub fn vrhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urhadd.v8i16" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i16")] - fn _vrhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - } - unsafe { _vrhaddq_u16(a, b) } +pub fn vshrq_n_u8(a: uint8x16_t) -> uint8x16_t { + static_assert!(N >= 1 && N <= 8); + let n: i32 = if N == 8 { + return vdupq_n_u8(0); + } else { + N + }; + unsafe { simd_shr(a, vdupq_n_u8(n as _)) } } -#[doc = "Rounding halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_u32)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urhadd) + assert_instr(ushr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56824,27 +56426,26 @@ pub fn vrhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urhadd.v2i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v2i32")] - fn _vrhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - } - unsafe { _vrhadd_u32(a, b) } +pub fn vshr_n_u16(a: uint16x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + let n: i32 = if N == 16 { + return vdup_n_u16(0); + } else { + N + }; + unsafe { simd_shr(a, vdup_n_u16(n as _)) } } -#[doc = "Rounding halving add"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_u32)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u32"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urhadd) + assert_instr(ushr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56853,85 +56454,26 @@ pub fn vrhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urhadd.v4i32" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i32")] - fn _vrhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - } - unsafe { _vrhaddq_u32(a, b) } -} -#[doc = "Floating-point round to integral, to nearest with ties to even"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f16)"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frintn) -)] -#[target_feature(enable = "neon,fp16")] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vrndn_f16(a: float16x4_t) -> float16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm"), - link_name = "llvm.roundeven.v4f16" - )] - fn _vrndn_f16(a: float16x4_t) -> float16x4_t; - } - unsafe { _vrndn_f16(a) } -} -#[doc = "Floating-point round to integral, to nearest with ties to even"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnq_f16)"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frintn) -)] -#[target_feature(enable = "neon,fp16")] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vrndnq_f16(a: float16x8_t) -> float16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm"), - link_name = "llvm.roundeven.v8f16" - )] - fn _vrndnq_f16(a: float16x8_t) -> float16x8_t; - } - unsafe { _vrndnq_f16(a) } +pub fn vshrq_n_u16(a: uint16x8_t) -> uint16x8_t { + static_assert!(N >= 1 && N <= 16); + let n: i32 = if N == 16 { + return vdupq_n_u16(0); + } else { + N + }; + unsafe { simd_shr(a, vdupq_n_u16(n as _)) } } -#[doc = "Floating-point round to integral, to nearest with ties to even"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f32)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frintn) + assert_instr(ushr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56940,26 +56482,26 @@ pub fn vrndnq_f16(a: float16x8_t) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrndn_f32(a: float32x2_t) -> float32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm"), - link_name = "llvm.roundeven.v2f32" - )] - fn _vrndn_f32(a: float32x2_t) -> float32x2_t; - } - unsafe { _vrndn_f32(a) } +pub fn vshr_n_u32(a: uint32x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + let n: i32 = if N == 32 { + return vdup_n_u32(0); + } else { + N + }; + unsafe { simd_shr(a, vdup_n_u32(n as _)) } } -#[doc = "Floating-point round to integral, to nearest with ties to even"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnq_f32)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frintn) + assert_instr(ushr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56968,26 +56510,26 @@ pub fn vrndn_f32(a: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrndnq_f32(a: float32x4_t) -> float32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm"), - link_name = "llvm.roundeven.v4f32" - )] - fn _vrndnq_f32(a: float32x4_t) -> float32x4_t; - } - unsafe { _vrndnq_f32(a) } +pub fn vshrq_n_u32(a: uint32x4_t) -> uint32x4_t { + static_assert!(N >= 1 && N <= 32); + let n: i32 = if N == 32 { + return vdupq_n_u32(0); + } else { + N + }; + unsafe { simd_shr(a, vdupq_n_u32(n as _)) } } -#[doc = "Signed rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s8)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshl) + assert_instr(ushr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -56996,27 +56538,26 @@ pub fn vrndnq_f32(a: float32x4_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srshl.v8i8" - )] - fn _vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - } - unsafe { _vrshl_s8(a, b) } +pub fn vshr_n_u64(a: uint64x1_t) -> uint64x1_t { + static_assert!(N >= 1 && N <= 64); + let n: i32 = if N == 64 { + return vdup_n_u64(0); + } else { + N + }; + unsafe { simd_shr(a, vdup_n_u64(n as _)) } } -#[doc = "Signed rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s8)"] +#[doc = "Shift right"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshl) + assert_instr(ushr, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -57025,27 +56566,26 @@ pub fn vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v16i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srshl.v16i8" - )] - fn _vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - } - unsafe { _vrshlq_s8(a, b) } +pub fn vshrq_n_u64(a: uint64x2_t) -> uint64x2_t { + static_assert!(N >= 1 && N <= 64); + let n: i32 = if N == 64 { + return vdupq_n_u64(0); + } else { + N + }; + unsafe { simd_shr(a, vdupq_n_u64(n as _)) } } -#[doc = "Signed rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s16)"] +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshl) + assert_instr(shrn, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -57054,27 +56594,21 @@ pub fn vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srshl.v4i16" - )] - fn _vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - } - unsafe { _vrshl_s16(a, b) } +pub fn vshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe { simd_cast(simd_shr(a, vdupq_n_s16(N as _))) } } -#[doc = "Signed rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s16)"] +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshl) + assert_instr(shrn, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -57083,27 +56617,21 @@ pub fn vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srshl.v8i16" - )] - fn _vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - } - unsafe { _vrshlq_s16(a, b) } +pub fn vshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe { simd_cast(simd_shr(a, vdupq_n_s32(N as _))) } } -#[doc = "Signed rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s32)"] +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshl) + assert_instr(shrn, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -57112,27 +56640,21 @@ pub fn vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srshl.v2i32" - )] - fn _vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - } - unsafe { _vrshl_s32(a, b) } +pub fn vshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe { simd_cast(simd_shr(a, vdupq_n_s64(N as _))) } } -#[doc = "Signed rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s32)"] +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshl) + assert_instr(shrn, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -57141,27 +56663,21 @@ pub fn vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srshl.v4i32" - )] - fn _vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - } - unsafe { _vrshlq_s32(a, b) } +pub fn vshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe { simd_cast(simd_shr(a, vdupq_n_u16(N as _))) } } -#[doc = "Signed rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s64)"] +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshl) + assert_instr(shrn, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -57170,27 +56686,21 @@ pub fn vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v1i64")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srshl.v1i64" - )] - fn _vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; - } - unsafe { _vrshl_s64(a, b) } +pub fn vshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe { simd_cast(simd_shr(a, vdupq_n_u32(N as _))) } } -#[doc = "Signed rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s64)"] +#[doc = "Shift right narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshl) + assert_instr(shrn, N = 2) )] +#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -57199,27 +56709,261 @@ pub fn vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i64")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.srshl.v2i64" - )] - fn _vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - } - unsafe { _vrshlq_s64(a, b) } +pub fn vshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe { simd_cast(simd_shr(a, vdupq_n_u64(N as _))) } } -#[doc = "Unsigned rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u8)"] +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s8)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert_uimm_bits!(N, 3); + vshiftlins_v8i8::(a, b) +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s8)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert_uimm_bits!(N, 3); + vshiftlins_v16i8::(a, b) +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert_uimm_bits!(N, 4); + vshiftlins_v4i16::(a, b) +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert_uimm_bits!(N, 4); + vshiftlins_v8i16::(a, b) +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert!(N >= 0 && N <= 31); + vshiftlins_v2i32::(a, b) +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert!(N >= 0 && N <= 31); + vshiftlins_v4i32::(a, b) +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + static_assert!(N >= 0 && N <= 63); + vshiftlins_v1i64::(a, b) +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert!(N >= 0 && N <= 63); + vshiftlins_v2i64::(a, b) +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u8)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { transmute(vshiftlins_v8i8::(transmute(a), transmute(b))) } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u8)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe { transmute(vshiftlins_v16i8::(transmute(a), transmute(b))) } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert_uimm_bits!(N, 4); + unsafe { transmute(vshiftlins_v4i16::(transmute(a), transmute(b))) } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { transmute(vshiftlins_v8i16::(transmute(a), transmute(b))) } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert!(N >= 0 && N <= 31); + unsafe { transmute(vshiftlins_v2i32::(transmute(a), transmute(b))) } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u32)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert!(N >= 0 && N <= 31); + unsafe { transmute(vshiftlins_v4i32::(transmute(a), transmute(b))) } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + static_assert!(N >= 0 && N <= 63); + unsafe { transmute(vshiftlins_v1i64::(transmute(a), transmute(b))) } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u64)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert!(N >= 0 && N <= 63); + unsafe { transmute(vshiftlins_v2i64::(transmute(a), transmute(b))) } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p8)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + static_assert_uimm_bits!(N, 3); + unsafe { transmute(vshiftlins_v8i8::(transmute(a), transmute(b))) } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p8)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + static_assert_uimm_bits!(N, 3); + unsafe { transmute(vshiftlins_v16i8::(transmute(a), transmute(b))) } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsli_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + static_assert_uimm_bits!(N, 4); + unsafe { transmute(vshiftlins_v4i16::(transmute(a), transmute(b))) } +} +#[doc = "Shift Left and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsliq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + static_assert_uimm_bits!(N, 4); + unsafe { transmute(vshiftlins_v8i16::(transmute(a), transmute(b))) } +} +#[doc = "Signed shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshl) + assert_instr(ssra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -57228,27 +56972,21 @@ pub fn vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urshl.v8i8" - )] - fn _vrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; - } - unsafe { _vrshl_u8(a, b) } +pub fn vsra_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert!(N >= 1 && N <= 8); + unsafe { simd_add(a, vshr_n_s8::(b)) } } -#[doc = "Unsigned rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u8)"] +#[doc = "Signed shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshl) + assert_instr(ssra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -57257,27 +56995,21 @@ pub fn vrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v16i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urshl.v16i8" - )] - fn _vrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; - } - unsafe { _vrshlq_u8(a, b) } +pub fn vsraq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert!(N >= 1 && N <= 8); + unsafe { simd_add(a, vshrq_n_s8::(b)) } } -#[doc = "Unsigned rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u16)"] +#[doc = "Signed shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshl) + assert_instr(ssra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -57286,27 +57018,21 @@ pub fn vrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urshl.v4i16" - )] - fn _vrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; - } - unsafe { _vrshl_u16(a, b) } +pub fn vsra_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert!(N >= 1 && N <= 16); + unsafe { simd_add(a, vshr_n_s16::(b)) } } -#[doc = "Unsigned rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u16)"] +#[doc = "Signed shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshl) + assert_instr(ssra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -57315,27 +57041,21 @@ pub fn vrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urshl.v8i16" - )] - fn _vrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; - } - unsafe { _vrshlq_u16(a, b) } +pub fn vsraq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert!(N >= 1 && N <= 16); + unsafe { simd_add(a, vshrq_n_s16::(b)) } } -#[doc = "Unsigned rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u32)"] +#[doc = "Signed shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshl) + assert_instr(ssra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -57344,27 +57064,21 @@ pub fn vrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urshl.v2i32" - )] - fn _vrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; - } - unsafe { _vrshl_u32(a, b) } +pub fn vsra_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert!(N >= 1 && N <= 32); + unsafe { simd_add(a, vshr_n_s32::(b)) } } -#[doc = "Unsigned rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u32)"] +#[doc = "Signed shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshl) + assert_instr(ssra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -57373,27 +57087,21 @@ pub fn vrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urshl.v4i32" - )] - fn _vrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; - } - unsafe { _vrshlq_u32(a, b) } +pub fn vsraq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert!(N >= 1 && N <= 32); + unsafe { simd_add(a, vshrq_n_s32::(b)) } } -#[doc = "Unsigned rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u64)"] +#[doc = "Signed shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshl) + assert_instr(ssra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -57402,27 +57110,21 @@ pub fn vrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v1i64")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urshl.v1i64" - )] - fn _vrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; - } - unsafe { _vrshl_u64(a, b) } +pub fn vsra_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + static_assert!(N >= 1 && N <= 64); + unsafe { simd_add(a, vshr_n_s64::(b)) } } -#[doc = "Unsigned rounding shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u64)"] +#[doc = "Signed shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshl) + assert_instr(ssra, N = 2) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -57431,28 +57133,21 @@ pub fn vrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i64")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.urshl.v2i64" - )] - fn _vrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; - } - unsafe { _vrshlq_u64(a, b) } +pub fn vsraq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert!(N >= 1 && N <= 64); + unsafe { simd_add(a, vshrq_n_s64::(b)) } } -#[doc = "Signed rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s8)"] +#[doc = "Unsigned shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshr, N = 2) + assert_instr(usra, N = 2) )] -#[rustc_legacy_const_generics(1)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -57461,21 +57156,21 @@ pub fn vrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshr_n_s8(a: int8x8_t) -> int8x8_t { +pub fn vsra_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { static_assert!(N >= 1 && N <= 8); - vrshl_s8(a, vdup_n_s8(-N as _)) + unsafe { simd_add(a, vshr_n_u8::(b)) } } -#[doc = "Signed rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s8)"] +#[doc = "Unsigned shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshr, N = 2) + assert_instr(usra, N = 2) )] -#[rustc_legacy_const_generics(1)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -57484,21 +57179,21 @@ pub fn vrshr_n_s8(a: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshrq_n_s8(a: int8x16_t) -> int8x16_t { +pub fn vsraq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { static_assert!(N >= 1 && N <= 8); - vrshlq_s8(a, vdupq_n_s8(-N as _)) + unsafe { simd_add(a, vshrq_n_u8::(b)) } } -#[doc = "Signed rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s16)"] +#[doc = "Unsigned shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshr, N = 2) + assert_instr(usra, N = 2) )] -#[rustc_legacy_const_generics(1)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -57507,21 +57202,21 @@ pub fn vrshrq_n_s8(a: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshr_n_s16(a: int16x4_t) -> int16x4_t { +pub fn vsra_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { static_assert!(N >= 1 && N <= 16); - vrshl_s16(a, vdup_n_s16(-N as _)) + unsafe { simd_add(a, vshr_n_u16::(b)) } } -#[doc = "Signed rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s16)"] +#[doc = "Unsigned shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshr, N = 2) + assert_instr(usra, N = 2) )] -#[rustc_legacy_const_generics(1)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -57530,21 +57225,21 @@ pub fn vrshr_n_s16(a: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshrq_n_s16(a: int16x8_t) -> int16x8_t { +pub fn vsraq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { static_assert!(N >= 1 && N <= 16); - vrshlq_s16(a, vdupq_n_s16(-N as _)) + unsafe { simd_add(a, vshrq_n_u16::(b)) } } -#[doc = "Signed rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s32)"] +#[doc = "Unsigned shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshr, N = 2) + assert_instr(usra, N = 2) )] -#[rustc_legacy_const_generics(1)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -57553,21 +57248,21 @@ pub fn vrshrq_n_s16(a: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshr_n_s32(a: int32x2_t) -> int32x2_t { +pub fn vsra_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { static_assert!(N >= 1 && N <= 32); - vrshl_s32(a, vdup_n_s32(-N as _)) + unsafe { simd_add(a, vshr_n_u32::(b)) } } -#[doc = "Signed rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s32)"] +#[doc = "Unsigned shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshr, N = 2) + assert_instr(usra, N = 2) )] -#[rustc_legacy_const_generics(1)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -57576,21 +57271,21 @@ pub fn vrshr_n_s32(a: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshrq_n_s32(a: int32x4_t) -> int32x4_t { +pub fn vsraq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { static_assert!(N >= 1 && N <= 32); - vrshlq_s32(a, vdupq_n_s32(-N as _)) + unsafe { simd_add(a, vshrq_n_u32::(b)) } } -#[doc = "Signed rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s64)"] -#[inline] +#[doc = "Unsigned shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u64)"] +#[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshr, N = 2) + assert_instr(usra, N = 2) )] -#[rustc_legacy_const_generics(1)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -57599,21 +57294,21 @@ pub fn vrshrq_n_s32(a: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshr_n_s64(a: int64x1_t) -> int64x1_t { +pub fn vsra_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { static_assert!(N >= 1 && N <= 64); - vrshl_s64(a, vdup_n_s64(-N as _)) + unsafe { simd_add(a, vshr_n_u64::(b)) } } -#[doc = "Signed rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s64)"] +#[doc = "Unsigned shift right and accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srshr, N = 2) + assert_instr(usra, N = 2) )] -#[rustc_legacy_const_generics(1)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -57622,1300 +57317,1122 @@ pub fn vrshr_n_s64(a: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vrshrq_n_s64(a: int64x2_t) -> int64x2_t { +pub fn vsraq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { static_assert!(N >= 1 && N <= 64); - vrshlq_s64(a, vdupq_n_s64(-N as _)) + unsafe { simd_add(a, vshrq_n_u64::(b)) } } -#[doc = "Unsigned rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u8)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s8)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrshr_n_u8(a: uint8x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); - vrshl_u8(a, vdup_n_s8(-N as _)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert!(1 <= N && N <= 8); + vshiftrins_v8i8::(a, b) } -#[doc = "Unsigned rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u8)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s8)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrshrq_n_u8(a: uint8x16_t) -> uint8x16_t { - static_assert!(N >= 1 && N <= 8); - vrshlq_u8(a, vdupq_n_s8(-N as _)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert!(1 <= N && N <= 8); + vshiftrins_v16i8::(a, b) } -#[doc = "Unsigned rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u16)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrshr_n_u16(a: uint16x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); - vrshl_u16(a, vdup_n_s16(-N as _)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert!(1 <= N && N <= 16); + vshiftrins_v4i16::(a, b) } -#[doc = "Unsigned rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u16)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrshrq_n_u16(a: uint16x8_t) -> uint16x8_t { - static_assert!(N >= 1 && N <= 16); - vrshlq_u16(a, vdupq_n_s16(-N as _)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert!(1 <= N && N <= 16); + vshiftrins_v8i16::(a, b) } -#[doc = "Unsigned rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u32)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s32)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrshr_n_u32(a: uint32x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); - vrshl_u32(a, vdup_n_s32(-N as _)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert!(1 <= N && N <= 32); + vshiftrins_v2i32::(a, b) } -#[doc = "Unsigned rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u32)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s32)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrshrq_n_u32(a: uint32x4_t) -> uint32x4_t { - static_assert!(N >= 1 && N <= 32); - vrshlq_u32(a, vdupq_n_s32(-N as _)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert!(1 <= N && N <= 32); + vshiftrins_v4i32::(a, b) } -#[doc = "Unsigned rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u64)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s64)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrshr_n_u64(a: uint64x1_t) -> uint64x1_t { - static_assert!(N >= 1 && N <= 64); - vrshl_u64(a, vdup_n_s64(-N as _)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + static_assert!(1 <= N && N <= 64); + vshiftrins_v1i64::(a, b) } -#[doc = "Unsigned rounding shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u64)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s64)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(urshr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrshrq_n_u64(a: uint64x2_t) -> uint64x2_t { - static_assert!(N >= 1 && N <= 64); - vrshlq_u64(a, vdupq_n_s64(-N as _)) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert!(1 <= N && N <= 64); + vshiftrins_v2i64::(a, b) } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s16)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u8)"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vrshrn_n_s16(a: int16x8_t) -> int8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v8i8")] - fn _vrshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t; - } - unsafe { _vrshrn_n_s16(a, const { int16x8_t([-N as i16; 8]) }) } +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert!(1 <= N && N <= 8); + unsafe { transmute(vshiftrins_v8i8::(transmute(a), transmute(b))) } } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s32)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u8)"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vrshrn_n_s32(a: int32x4_t) -> int16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v4i16")] - fn _vrshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t; - } - unsafe { _vrshrn_n_s32(a, const { int32x4_t([-N; 4]) }) } +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert!(1 <= N && N <= 8); + unsafe { transmute(vshiftrins_v16i8::(transmute(a), transmute(b))) } } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s64)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u16)"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub fn vrshrn_n_s64(a: int64x2_t) -> int32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v2i32")] - fn _vrshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t; - } - unsafe { _vrshrn_n_s64(a, const { int64x2_t([-N as i64; 2]) }) } +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert!(1 <= N && N <= 16); + unsafe { transmute(vshiftrins_v4i16::(transmute(a), transmute(b))) } } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s16)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u16)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(rshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrshrn_n_s16(a: int16x8_t) -> int8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.rshrn.v8i8" - )] - fn _vrshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t; - } - unsafe { _vrshrn_n_s16(a, N) } +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert!(1 <= N && N <= 16); + unsafe { transmute(vshiftrins_v8i16::(transmute(a), transmute(b))) } } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s32)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u32)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(rshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrshrn_n_s32(a: int32x4_t) -> int16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.rshrn.v4i16" - )] - fn _vrshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t; - } - unsafe { _vrshrn_n_s32(a, N) } +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert!(1 <= N && N <= 32); + unsafe { transmute(vshiftrins_v2i32::(transmute(a), transmute(b))) } } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s64)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u32)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(rshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub fn vrshrn_n_s64(a: int64x2_t) -> int32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.rshrn.v2i32" - )] - fn _vrshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t; - } - unsafe { _vrshrn_n_s64(a, N) } +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert!(1 <= N && N <= 32); + unsafe { transmute(vshiftrins_v4i32::(transmute(a), transmute(b))) } } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_u16)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u64)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rshrn, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrshrn_n_u16(a: uint16x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe { transmute(vrshrn_n_s16::(transmute(a))) } +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + static_assert!(1 <= N && N <= 64); + unsafe { transmute(vshiftrins_v1i64::(transmute(a), transmute(b))) } } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_u32)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u64)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rshrn, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrshrn_n_u32(a: uint32x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe { transmute(vrshrn_n_s32::(transmute(a))) } +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert!(1 <= N && N <= 64); + unsafe { transmute(vshiftrins_v2i64::(transmute(a), transmute(b))) } } -#[doc = "Rounding shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_u64)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p8)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rshrn, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe { transmute(vrshrn_n_s64::(transmute(a))) } +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + static_assert!(1 <= N && N <= 8); + unsafe { transmute(vshiftrins_v8i8::(transmute(a), transmute(b))) } } -#[doc = "Reciprocal square-root estimate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f16)"] +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p8)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,fp16")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frsqrte) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + static_assert!(1 <= N && N <= 8); + unsafe { transmute(vshiftrins_v16i8::(transmute(a), transmute(b))) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsri_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + static_assert!(1 <= N && N <= 16); + unsafe { transmute(vshiftrins_v4i16::(transmute(a), transmute(b))) } +} +#[doc = "Shift Right and Insert (immediate)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p16)"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub fn vsriq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + static_assert!(1 <= N && N <= 16); + unsafe { transmute(vshiftrins_v8i16::(transmute(a), transmute(b))) } +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vrsqrte_f16(a: float16x4_t) -> float16x4_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +pub unsafe fn vst1_f16(ptr: *mut f16, a: float16x4_t) { + vst1_v4f16( + ptr as *const i8, + transmute(a), + crate::mem::align_of::() as i32, + ) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +pub unsafe fn vst1q_f16(ptr: *mut f16, a: float16x8_t) { + vst1q_v8f16( + ptr as *const i8, + transmute(a), + crate::mem::align_of::() as i32, + ) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst1_f16_x2(a: *mut f16, b: float16x4x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4f16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrte.v4f16" - )] - fn _vrsqrte_f16(a: float16x4_t) -> float16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0.v4f16")] + fn _vst1_f16_x2(ptr: *mut f16, a: float16x4_t, b: float16x4_t); } - unsafe { _vrsqrte_f16(a) } + _vst1_f16_x2(a, b.0, b.1) } -#[doc = "Reciprocal square-root estimate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_f16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst1))] #[target_feature(enable = "neon,fp16")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frsqrte) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vrsqrteq_f16(a: float16x8_t) -> float16x8_t { +pub unsafe fn vst1q_f16_x2(a: *mut f16, b: float16x8x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v8f16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrte.v8f16" - )] - fn _vrsqrteq_f16(a: float16x8_t) -> float16x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0.v8f16")] + fn _vst1q_f16_x2(ptr: *mut f16, a: float16x8_t, b: float16x8_t); } - unsafe { _vrsqrteq_f16(a) } + _vst1q_f16_x2(a, b.0, b.1) } -#[doc = "Reciprocal square-root estimate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frsqrte) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsqrte_f32(a: float32x2_t) -> float32x2_t { +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst1_f16_x2(a: *mut f16, b: float16x4x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2f32")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrte.v2f32" + link_name = "llvm.aarch64.neon.st1x2.v4f16.p0" )] - fn _vrsqrte_f32(a: float32x2_t) -> float32x2_t; + fn _vst1_f16_x2(a: float16x4_t, b: float16x4_t, ptr: *mut f16); } - unsafe { _vrsqrte_f32(a) } + _vst1_f16_x2(b.0, b.1, a) } -#[doc = "Reciprocal square-root estimate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_f32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frsqrte) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsqrteq_f32(a: float32x4_t) -> float32x4_t { +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst1q_f16_x2(a: *mut f16, b: float16x8x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4f32")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrte.v4f32" + link_name = "llvm.aarch64.neon.st1x2.v8f16.p0" )] - fn _vrsqrteq_f32(a: float32x4_t) -> float32x4_t; + fn _vst1q_f16_x2(a: float16x8_t, b: float16x8_t, ptr: *mut f16); } - unsafe { _vrsqrteq_f32(a) } + _vst1q_f16_x2(b.0, b.1, a) } -#[doc = "Unsigned reciprocal square root estimate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_u32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ursqrte) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsqrte_u32(a: uint32x2_t) -> uint32x2_t { +#[cfg_attr(test, assert_instr(vst1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst1_f16_x3(a: *mut f16, b: float16x4x3_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ursqrte.v2i32" - )] - fn _vrsqrte_u32(a: uint32x2_t) -> uint32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v4f16")] + fn _vst1_f16_x3(ptr: *mut f16, a: float16x4_t, b: float16x4_t, c: float16x4_t); } - unsafe { _vrsqrte_u32(a) } + _vst1_f16_x3(a, b.0, b.1, b.2) } -#[doc = "Unsigned reciprocal square root estimate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_u32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ursqrte) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsqrteq_u32(a: uint32x4_t) -> uint32x4_t { +#[cfg_attr(test, assert_instr(vst1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst1q_f16_x3(a: *mut f16, b: float16x8x3_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ursqrte.v4i32" - )] - fn _vrsqrteq_u32(a: uint32x4_t) -> uint32x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v8f16")] + fn _vst1q_f16_x3(ptr: *mut f16, a: float16x8_t, b: float16x8_t, c: float16x8_t); } - unsafe { _vrsqrteq_u32(a) } + _vst1q_f16_x3(a, b.0, b.1, b.2) } -#[doc = "Floating-point reciprocal square root step"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] #[target_feature(enable = "neon,fp16")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frsqrts) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vrsqrts_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { +pub unsafe fn vst1_f16_x3(a: *mut f16, b: float16x4x3_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v4f16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrts.v4f16" + link_name = "llvm.aarch64.neon.st1x3.v4f16.p0" )] - fn _vrsqrts_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t; + fn _vst1_f16_x3(a: float16x4_t, b: float16x4_t, c: float16x4_t, ptr: *mut f16); } - unsafe { _vrsqrts_f16(a, b) } + _vst1_f16_x3(b.0, b.1, b.2, a) } -#[doc = "Floating-point reciprocal square root step"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsq_f16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] #[target_feature(enable = "neon,fp16")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frsqrts) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vrsqrtsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { +pub unsafe fn vst1q_f16_x3(a: *mut f16, b: float16x8x3_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v8f16")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrts.v8f16" + link_name = "llvm.aarch64.neon.st1x3.v8f16.p0" )] - fn _vrsqrtsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + fn _vst1q_f16_x3(a: float16x8_t, b: float16x8_t, c: float16x8_t, ptr: *mut f16); } - unsafe { _vrsqrtsq_f16(a, b) } + _vst1q_f16_x3(b.0, b.1, b.2, a) } -#[doc = "Floating-point reciprocal square root step"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frsqrts) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsqrts_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_f16_x4(a: *mut f16, b: float16x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v4f16")] + fn _vst1_f16_x4( + ptr: *mut f16, + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + ); + } + _vst1_f16_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_f16_x4(a: *mut f16, b: float16x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v8f16")] + fn _vst1q_f16_x4( + ptr: *mut f16, + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + ); + } + _vst1q_f16_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst1_f16_x4(a: *mut f16, b: float16x4x4_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v2f32")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrts.v2f32" + link_name = "llvm.aarch64.neon.st1x4.v4f16.p0" )] - fn _vrsqrts_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + fn _vst1_f16_x4( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + ptr: *mut f16, + ); } - unsafe { _vrsqrts_f32(a, b) } + _vst1_f16_x4(b.0, b.1, b.2, b.3, a) } -#[doc = "Floating-point reciprocal square root step"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsq_f32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(frsqrts) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsqrtsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst1q_f16_x4(a: *mut f16, b: float16x8x4_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v4f32")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.frsqrts.v4f32" + link_name = "llvm.aarch64.neon.st1x4.v8f16.p0" )] - fn _vrsqrtsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + fn _vst1q_f16_x4( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + ptr: *mut f16, + ); } - unsafe { _vrsqrtsq_f32(a, b) } + _vst1q_f16_x4(b.0, b.1, b.2, b.3, a) } -#[doc = "Signed rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srsra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsra_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe { simd_add(a, vrshr_n_s8::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] +pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v2f32::(ptr as *const i8, transmute(a)) } -#[doc = "Signed rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srsra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsraq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - static_assert!(N >= 1 && N <= 8); - unsafe { simd_add(a, vrshrq_n_s8::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] +pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v4f32::(ptr as *const i8, transmute(a)) } -#[doc = "Signed rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srsra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsra_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe { simd_add(a, vrshr_n_s16::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] +pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v8i8::(ptr as *const i8, a) } -#[doc = "Signed rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srsra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsraq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - static_assert!(N >= 1 && N <= 16); - unsafe { simd_add(a, vrshrq_n_s16::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] +pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v16i8::(ptr as *const i8, a) } -#[doc = "Signed rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srsra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsra_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe { simd_add(a, vrshr_n_s32::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v4i16::(ptr as *const i8, a) } -#[doc = "Signed rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srsra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsraq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - static_assert!(N >= 1 && N <= 32); - unsafe { simd_add(a, vrshrq_n_s32::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v8i16::(ptr as *const i8, a) } -#[doc = "Signed rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srsra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsra_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - static_assert!(N >= 1 && N <= 64); - unsafe { simd_add(a, vrshr_n_s64::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] +pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v2i32::(ptr as *const i8, a) } -#[doc = "Signed rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(srsra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsraq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - static_assert!(N >= 1 && N <= 64); - unsafe { simd_add(a, vrshrq_n_s64::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] +pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v4i32::(ptr as *const i8, a) } -#[doc = "Unsigned rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ursra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsra_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe { simd_add(a, vrshr_n_u8::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] +pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v1i64::(ptr as *const i8, a) } -#[doc = "Unsigned rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ursra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsraq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - static_assert!(N >= 1 && N <= 8); - unsafe { simd_add(a, vrshrq_n_u8::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] +pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v2i64::(ptr as *const i8, a) } -#[doc = "Unsigned rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ursra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsra_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe { simd_add(a, vrshr_n_u16::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] +pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v8i8::(ptr as *const i8, transmute(a)) } -#[doc = "Unsigned rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ursra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsraq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - static_assert!(N >= 1 && N <= 16); - unsafe { simd_add(a, vrshrq_n_u16::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] +pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v16i8::(ptr as *const i8, transmute(a)) } -#[doc = "Unsigned rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ursra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsra_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe { simd_add(a, vrshr_n_u32::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v4i16::(ptr as *const i8, transmute(a)) } -#[doc = "Unsigned rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ursra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsraq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - static_assert!(N >= 1 && N <= 32); - unsafe { simd_add(a, vrshrq_n_u32::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v8i16::(ptr as *const i8, transmute(a)) } -#[doc = "Unsigned rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ursra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsra_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - static_assert!(N >= 1 && N <= 64); - unsafe { simd_add(a, vrshr_n_u64::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] +pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v2i32::(ptr as *const i8, transmute(a)) } -#[doc = "Unsigned rounding shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ursra, N = 2) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsraq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - static_assert!(N >= 1 && N <= 64); - unsafe { simd_add(a, vrshrq_n_u64::(b)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] +pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v4i32::(ptr as *const i8, transmute(a)) } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_s16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rsubhn) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v8i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.rsubhn.v8i8" - )] - fn _vrsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t; - } - unsafe { _vrsubhn_s16(a, b) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] +pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v1i64::(ptr as *const i8, transmute(a)) } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_s32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rsubhn) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v4i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.rsubhn.v4i16" - )] - fn _vrsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t; - } - unsafe { _vrsubhn_s32(a, b) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] +pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v2i64::(ptr as *const i8, transmute(a)) } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_s64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rsubhn) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t { +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] +pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v8i8::(ptr as *const i8, transmute(a)) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] +pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v16i8::(ptr as *const i8, transmute(a)) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v4i16::(ptr as *const i8, transmute(a)) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v8i16::(ptr as *const i8, transmute(a)) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] +pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v1i64::(ptr as *const i8, transmute(a)) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] +pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) { + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v2i64::(ptr as *const i8, transmute(a)) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v2f32.p0")] + fn _vst1_f32_x2(ptr: *mut f32, a: float32x2_t, b: float32x2_t); + } + _vst1_f32_x2(a, b.0, b.1) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v4f32.p0")] + fn _vst1q_f32_x2(ptr: *mut f32, a: float32x4_t, b: float32x4_t); + } + _vst1q_f32_x2(a, b.0, b.1) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v2i32")] #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.rsubhn.v2i32" + link_name = "llvm.aarch64.neon.st1x2.v2f32.p0" )] - fn _vrsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t; + fn _vst1_f32_x2(a: float32x2_t, b: float32x2_t, ptr: *mut f32); } - unsafe { _vrsubhn_s64(a, b) } + _vst1_f32_x2(b.0, b.1, a) } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rsubhn) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { - unsafe { transmute(vrsubhn_s16(transmute(a), transmute(b))) } +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x2.v4f32.p0" + )] + fn _vst1q_f32_x2(a: float32x4_t, b: float32x4_t, ptr: *mut f32); + } + _vst1q_f32_x2(b.0, b.1, a) } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rsubhn) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { - unsafe { - let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(vrsubhn_s16(transmute(a), transmute(b))); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_f32_x3(a: *mut f32, b: float32x2x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v2f32.p0" + )] + fn _vst1_f32_x3(a: float32x2_t, b: float32x2_t, c: float32x2_t, ptr: *mut f32); } + _vst1_f32_x3(b.0, b.1, b.2, a) } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rsubhn) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { - unsafe { transmute(vrsubhn_s32(transmute(a), transmute(b))) } +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_f32_x3(a: *mut f32, b: float32x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v4f32.p0" + )] + fn _vst1q_f32_x3(a: float32x4_t, b: float32x4_t, c: float32x4_t, ptr: *mut f32); + } + _vst1q_f32_x3(b.0, b.1, b.2, a) } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rsubhn) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { - unsafe { - let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); - let ret_val: uint16x4_t = transmute(vrsubhn_s32(transmute(a), transmute(b))); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v2f32.p0")] + fn _vst1_f32_x4( + ptr: *mut f32, + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, + d: float32x2_t, + ); } + _vst1_f32_x4(a, b.0, b.1, b.2, b.3) } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u64)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v4f32.p0")] + fn _vst1q_f32_x4( + ptr: *mut f32, + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, + d: float32x4_t, + ); + } + _vst1q_f32_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rsubhn) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { - unsafe { transmute(vrsubhn_s64(transmute(a), transmute(b))) } +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v2f32.p0" + )] + fn _vst1_f32_x4( + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, + d: float32x2_t, + ptr: *mut f32, + ); + } + _vst1_f32_x4(b.0, b.1, b.2, b.3, a) } -#[doc = "Rounding subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u64)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(rsubhn) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vrsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { - unsafe { - let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]); - let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]); - let ret_val: uint32x2_t = transmute(vrsubhn_s64(transmute(a), transmute(b))); - simd_shuffle!(ret_val, ret_val, [1, 0]) +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v4f32.p0" + )] + fn _vst1q_f32_x4( + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, + d: float32x4_t, + ptr: *mut f32, + ); } + _vst1q_f32_x4(b.0, b.1, b.2, b.3, a) } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0) )] #[rustc_legacy_const_generics(2)] -#[target_feature(enable = "neon,fp16")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vset_lane_f16(a: f16, b: float16x4_t) -> float16x4_t { +pub unsafe fn vst1_lane_f16(a: *mut f16, b: float16x4_t) { static_assert_uimm_bits!(LANE, 2); - unsafe { simd_insert!(b, LANE as u32, a) } + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0) )] #[rustc_legacy_const_generics(2)] -#[target_feature(enable = "neon,fp16")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub fn vsetq_lane_f16(a: f16, b: float16x8_t) -> float16x8_t { +pub unsafe fn vst1q_lane_f16(a: *mut f16, b: float16x8_t) { static_assert_uimm_bits!(LANE, 3); - unsafe { simd_insert!(b, LANE as u32, a) } + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -58933,12 +58450,14 @@ pub fn vsetq_lane_f16(a: f16, b: float16x8_t) -> float16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vset_lane_f32(a: f32, b: float32x2_t) -> float32x2_t { +pub unsafe fn vst1_lane_f32(a: *mut f32, b: float32x2_t) { static_assert_uimm_bits!(LANE, 1); - unsafe { simd_insert!(b, LANE as u32, a) } + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -58956,12 +58475,14 @@ pub fn vset_lane_f32(a: f32, b: float32x2_t) -> float32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsetq_lane_f32(a: f32, b: float32x4_t) -> float32x4_t { +pub unsafe fn vst1q_lane_f32(a: *mut f32, b: float32x4_t) { static_assert_uimm_bits!(LANE, 2); - unsafe { simd_insert!(b, LANE as u32, a) } + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -58979,12 +58500,14 @@ pub fn vsetq_lane_f32(a: f32, b: float32x4_t) -> float32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vset_lane_s8(a: i8, b: int8x8_t) -> int8x8_t { +pub unsafe fn vst1_lane_s8(a: *mut i8, b: int8x8_t) { static_assert_uimm_bits!(LANE, 3); - unsafe { simd_insert!(b, LANE as u32, a) } + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -59002,12 +58525,14 @@ pub fn vset_lane_s8(a: i8, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsetq_lane_s8(a: i8, b: int8x16_t) -> int8x16_t { +pub unsafe fn vst1q_lane_s8(a: *mut i8, b: int8x16_t) { static_assert_uimm_bits!(LANE, 4); - unsafe { simd_insert!(b, LANE as u32, a) } + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -59025,12 +58550,14 @@ pub fn vsetq_lane_s8(a: i8, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vset_lane_s16(a: i16, b: int16x4_t) -> int16x4_t { +pub unsafe fn vst1_lane_s16(a: *mut i16, b: int16x4_t) { static_assert_uimm_bits!(LANE, 2); - unsafe { simd_insert!(b, LANE as u32, a) } + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -59048,12 +58575,14 @@ pub fn vset_lane_s16(a: i16, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsetq_lane_s16(a: i16, b: int16x8_t) -> int16x8_t { +pub unsafe fn vst1q_lane_s16(a: *mut i16, b: int16x8_t) { static_assert_uimm_bits!(LANE, 3); - unsafe { simd_insert!(b, LANE as u32, a) } + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -59071,12 +58600,14 @@ pub fn vsetq_lane_s16(a: i16, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vset_lane_s32(a: i32, b: int32x2_t) -> int32x2_t { +pub unsafe fn vst1_lane_s32(a: *mut i32, b: int32x2_t) { static_assert_uimm_bits!(LANE, 1); - unsafe { simd_insert!(b, LANE as u32, a) } + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -59094,12 +58625,14 @@ pub fn vset_lane_s32(a: i32, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsetq_lane_s32(a: i32, b: int32x4_t) -> int32x4_t { +pub unsafe fn vst1q_lane_s32(a: *mut i32, b: int32x4_t) { static_assert_uimm_bits!(LANE, 2); - unsafe { simd_insert!(b, LANE as u32, a) } + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -59117,12 +58650,14 @@ pub fn vsetq_lane_s32(a: i32, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsetq_lane_s64(a: i64, b: int64x2_t) -> int64x2_t { +pub unsafe fn vst1q_lane_s64(a: *mut i64, b: int64x2_t) { static_assert_uimm_bits!(LANE, 1); - unsafe { simd_insert!(b, LANE as u32, a) } + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -59140,12 +58675,14 @@ pub fn vsetq_lane_s64(a: i64, b: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vset_lane_u8(a: u8, b: uint8x8_t) -> uint8x8_t { +pub unsafe fn vst1_lane_u8(a: *mut u8, b: uint8x8_t) { static_assert_uimm_bits!(LANE, 3); - unsafe { simd_insert!(b, LANE as u32, a) } + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -59163,12 +58700,14 @@ pub fn vset_lane_u8(a: u8, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsetq_lane_u8(a: u8, b: uint8x16_t) -> uint8x16_t { +pub unsafe fn vst1q_lane_u8(a: *mut u8, b: uint8x16_t) { static_assert_uimm_bits!(LANE, 4); - unsafe { simd_insert!(b, LANE as u32, a) } + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -59186,12 +58725,14 @@ pub fn vsetq_lane_u8(a: u8, b: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vset_lane_u16(a: u16, b: uint16x4_t) -> uint16x4_t { +pub unsafe fn vst1_lane_u16(a: *mut u16, b: uint16x4_t) { static_assert_uimm_bits!(LANE, 2); - unsafe { simd_insert!(b, LANE as u32, a) } + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -59209,12 +58750,14 @@ pub fn vset_lane_u16(a: u16, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsetq_lane_u16(a: u16, b: uint16x8_t) -> uint16x8_t { +pub unsafe fn vst1q_lane_u16(a: *mut u16, b: uint16x8_t) { static_assert_uimm_bits!(LANE, 3); - unsafe { simd_insert!(b, LANE as u32, a) } + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -59232,12 +58775,14 @@ pub fn vsetq_lane_u16(a: u16, b: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vset_lane_u32(a: u32, b: uint32x2_t) -> uint32x2_t { +pub unsafe fn vst1_lane_u32(a: *mut u32, b: uint32x2_t) { static_assert_uimm_bits!(LANE, 1); - unsafe { simd_insert!(b, LANE as u32, a) } + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -59255,12 +58800,14 @@ pub fn vset_lane_u32(a: u32, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsetq_lane_u32(a: u32, b: uint32x4_t) -> uint32x4_t { +pub unsafe fn vst1q_lane_u32(a: *mut u32, b: uint32x4_t) { static_assert_uimm_bits!(LANE, 2); - unsafe { simd_insert!(b, LANE as u32, a) } + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -59278,12 +58825,14 @@ pub fn vsetq_lane_u32(a: u32, b: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsetq_lane_u64(a: u64, b: uint64x2_t) -> uint64x2_t { +pub unsafe fn vst1q_lane_u64(a: *mut u64, b: uint64x2_t) { static_assert_uimm_bits!(LANE, 1); - unsafe { simd_insert!(b, LANE as u32, a) } + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -59301,12 +58850,14 @@ pub fn vsetq_lane_u64(a: u64, b: uint64x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vset_lane_p8(a: p8, b: poly8x8_t) -> poly8x8_t { +pub unsafe fn vst1_lane_p8(a: *mut p8, b: poly8x8_t) { static_assert_uimm_bits!(LANE, 3); - unsafe { simd_insert!(b, LANE as u32, a) } + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -59324,12 +58875,14 @@ pub fn vset_lane_p8(a: p8, b: poly8x8_t) -> poly8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsetq_lane_p8(a: p8, b: poly8x16_t) -> poly8x16_t { +pub unsafe fn vst1q_lane_p8(a: *mut p8, b: poly8x16_t) { static_assert_uimm_bits!(LANE, 4); - unsafe { simd_insert!(b, LANE as u32, a) } + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -59347,12 +58900,14 @@ pub fn vsetq_lane_p8(a: p8, b: poly8x16_t) -> poly8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vset_lane_p16(a: p16, b: poly16x4_t) -> poly16x4_t { +pub unsafe fn vst1_lane_p16(a: *mut p16, b: poly16x4_t) { static_assert_uimm_bits!(LANE, 2); - unsafe { simd_insert!(b, LANE as u32, a) } + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -59370,15 +58925,17 @@ pub fn vset_lane_p16(a: p16, b: poly16x4_t) -> poly16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsetq_lane_p16(a: p16, b: poly16x8_t) -> poly16x8_t { +pub unsafe fn vst1q_lane_p16(a: *mut p16, b: poly16x8_t) { static_assert_uimm_bits!(LANE, 3); - unsafe { simd_insert!(b, LANE as u32, a) } + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,aes")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), @@ -59393,12 +58950,14 @@ pub fn vsetq_lane_p16(a: p16, b: poly16x8_t) -> poly16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vset_lane_p64(a: p64, b: poly64x1_t) -> poly64x1_t { +pub unsafe fn vst1_lane_p64(a: *mut p64, b: poly64x1_t) { static_assert!(LANE == 0); - unsafe { simd_insert!(b, LANE as u32, a) } + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -59416,12 +58975,14 @@ pub fn vset_lane_p64(a: p64, b: poly64x1_t) -> poly64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vset_lane_s64(a: i64, b: int64x1_t) -> int64x1_t { +pub unsafe fn vst1_lane_s64(a: *mut i64, b: int64x1_t) { static_assert!(LANE == 0); - unsafe { simd_insert!(b, LANE as u32, a) } + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -59439,21 +59000,22 @@ pub fn vset_lane_s64(a: i64, b: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vset_lane_u64(a: u64, b: uint64x1_t) -> uint64x1_t { +pub unsafe fn vst1_lane_u64(a: *mut u64, b: uint64x1_t) { static_assert!(LANE == 0); - unsafe { simd_insert!(b, LANE as u32, a) } + *a = simd_extract!(b, LANE as u32); } -#[doc = "Insert vector element from another vector element"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p64)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(st1) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -59462,665 +59024,976 @@ pub fn vset_lane_u64(a: u64, b: uint64x1_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsetq_lane_p64(a: p64, b: poly64x2_t) -> poly64x2_t { - static_assert_uimm_bits!(LANE, 1); - unsafe { simd_insert!(b, LANE as u32, a) } +pub unsafe fn vst1_p64_x2(a: *mut p64, b: poly64x1x2_t) { + vst1_s64_x2(transmute(a), transmute(b)) } -#[doc = "SHA1 hash update accelerator, choose."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1cq_u32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "sha2")] +#[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(test, assert_instr(sha1c))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] -pub fn vsha1cq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sha1c" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1c")] - fn _vsha1cq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t; - } - unsafe { _vsha1cq_u32(hash_abcd, hash_e, wk) } -} -#[doc = "SHA1 fixed rotate."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1h_u32)"] -#[inline] -#[target_feature(enable = "sha2")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(test, assert_instr(sha1h))] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") -)] -pub fn vsha1h_u32(hash_e: u32) -> u32 { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sha1h" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1h")] - fn _vsha1h_u32(hash_e: u32) -> u32; - } - unsafe { _vsha1h_u32(hash_e) } +pub unsafe fn vst1_p64_x3(a: *mut p64, b: poly64x1x3_t) { + vst1_s64_x3(transmute(a), transmute(b)) } -#[doc = "SHA1 hash update accelerator, majority"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1mq_u32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "sha2")] +#[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(test, assert_instr(sha1m))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] -pub fn vsha1mq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sha1m" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1m")] - fn _vsha1mq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t; - } - unsafe { _vsha1mq_u32(hash_abcd, hash_e, wk) } -} -#[doc = "SHA1 hash update accelerator, parity"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1pq_u32)"] -#[inline] -#[target_feature(enable = "sha2")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(test, assert_instr(sha1p))] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") -)] -pub fn vsha1pq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sha1p" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1p")] - fn _vsha1pq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t; - } - unsafe { _vsha1pq_u32(hash_abcd, hash_e, wk) } +pub unsafe fn vst1_p64_x4(a: *mut p64, b: poly64x1x4_t) { + vst1_s64_x4(transmute(a), transmute(b)) } -#[doc = "SHA1 schedule update accelerator, first part."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1su0q_u32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "sha2")] +#[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(test, assert_instr(sha1su0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] -pub fn vsha1su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sha1su0" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1su0")] - fn _vsha1su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t; - } - unsafe { _vsha1su0q_u32(w0_3, w4_7, w8_11) } -} -#[doc = "SHA1 schedule update accelerator, second part."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1su1q_u32)"] -#[inline] -#[target_feature(enable = "sha2")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(test, assert_instr(sha1su1))] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") -)] -pub fn vsha1su1q_u32(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sha1su1" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1su1")] - fn _vsha1su1q_u32(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t; - } - unsafe { _vsha1su1q_u32(tw0_3, w12_15) } +pub unsafe fn vst1q_p64_x2(a: *mut p64, b: poly64x2x2_t) { + vst1q_s64_x2(transmute(a), transmute(b)) } -#[doc = "SHA1 schedule update accelerator, upper part."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256h2q_u32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "sha2")] +#[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(test, assert_instr(sha256h2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] -pub fn vsha256h2q_u32(hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sha256h2" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256h2")] - fn _vsha256h2q_u32( - hash_abcd: uint32x4_t, - hash_efgh: uint32x4_t, - wk: uint32x4_t, - ) -> uint32x4_t; - } - unsafe { _vsha256h2q_u32(hash_abcd, hash_efgh, wk) } +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_p64_x3(a: *mut p64, b: poly64x2x3_t) { + vst1q_s64_x3(transmute(a), transmute(b)) } -#[doc = "SHA1 schedule update accelerator, first part."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256hq_u32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "sha2")] +#[target_feature(enable = "neon,aes")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(test, assert_instr(sha256h))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] -pub fn vsha256hq_u32(hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t) -> uint32x4_t { +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst1q_p64_x4(a: *mut p64, b: poly64x2x4_t) { + vst1q_s64_x4(transmute(a), transmute(b)) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sha256h" + link_name = "llvm.aarch64.neon.st1x2.v8i8.p0" )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256h")] - fn _vsha256hq_u32( - hash_abcd: uint32x4_t, - hash_efgh: uint32x4_t, - wk: uint32x4_t, - ) -> uint32x4_t; + fn _vst1_s8_x2(a: int8x8_t, b: int8x8_t, ptr: *mut i8); } - unsafe { _vsha256hq_u32(hash_abcd, hash_efgh, wk) } + _vst1_s8_x2(b.0, b.1, a) } -#[doc = "SHA256 schedule update accelerator, first part."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256su0q_u32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "sha2")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(test, assert_instr(sha256su0))] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") -)] -pub fn vsha256su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t { +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sha256su0" + link_name = "llvm.aarch64.neon.st1x2.v16i8.p0" )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256su0")] - fn _vsha256su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t; + fn _vst1q_s8_x2(a: int8x16_t, b: int8x16_t, ptr: *mut i8); } - unsafe { _vsha256su0q_u32(w0_3, w4_7) } + _vst1q_s8_x2(b.0, b.1, a) } -#[doc = "SHA256 schedule update accelerator, second part."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256su1q_u32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "sha2")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(test, assert_instr(sha256su1))] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0") -)] -pub fn vsha256su1q_u32(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t { +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.crypto.sha256su1" + link_name = "llvm.aarch64.neon.st1x2.v4i16.p0" )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256su1")] - fn _vsha256su1q_u32(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t) - -> uint32x4_t; + fn _vst1_s16_x2(a: int16x4_t, b: int16x4_t, ptr: *mut i16); } - unsafe { _vsha256su1q_u32(tw0_3, w8_11, w12_15) } + _vst1_s16_x2(b.0, b.1, a) } +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftlins_v16i8(a: int8x16_t, b: int8x16_t) -> int8x16_t { +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v16i8")] - fn _vshiftlins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t; + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x2.v8i16.p0" + )] + fn _vst1q_s16_x2(a: int16x8_t, b: int16x8_t, ptr: *mut i16); } - unsafe { _vshiftlins_v16i8(a, b, const { int8x16_t([N as i8; 16]) }) } + _vst1q_s16_x2(b.0, b.1, a) } +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftlins_v1i64(a: int64x1_t, b: int64x1_t) -> int64x1_t { +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v1i64")] - fn _vshiftlins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t; + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x2.v2i32.p0" + )] + fn _vst1_s32_x2(a: int32x2_t, b: int32x2_t, ptr: *mut i32); } - unsafe { _vshiftlins_v1i64(a, b, const { int64x1_t([N as i64; 1]) }) } + _vst1_s32_x2(b.0, b.1, a) } +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftlins_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t { +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i32")] - fn _vshiftlins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t; + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x2.v4i32.p0" + )] + fn _vst1q_s32_x2(a: int32x4_t, b: int32x4_t, ptr: *mut i32); } - unsafe { _vshiftlins_v2i32(a, b, const { int32x2_t([N; 2]) }) } + _vst1q_s32_x2(b.0, b.1, a) } +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftlins_v2i64(a: int64x2_t, b: int64x2_t) -> int64x2_t { +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i64")] - fn _vshiftlins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t; + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x2.v1i64.p0" + )] + fn _vst1_s64_x2(a: int64x1_t, b: int64x1_t, ptr: *mut i64); } - unsafe { _vshiftlins_v2i64(a, b, const { int64x2_t([N as i64; 2]) }) } + _vst1_s64_x2(b.0, b.1, a) } +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftlins_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t { +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i16")] - fn _vshiftlins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t; + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x2.v2i64.p0" + )] + fn _vst1q_s64_x2(a: int64x2_t, b: int64x2_t, ptr: *mut i64); } - unsafe { _vshiftlins_v4i16(a, b, const { int16x4_t([N as i16; 4]) }) } + _vst1q_s64_x2(b.0, b.1, a) } +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftlins_v4i32(a: int32x4_t, b: int32x4_t) -> int32x4_t { +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i32")] - fn _vshiftlins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v8i8.p0")] + fn _vst1_s8_x2(ptr: *mut i8, a: int8x8_t, b: int8x8_t); } - unsafe { _vshiftlins_v4i32(a, b, const { int32x4_t([N; 4]) }) } + _vst1_s8_x2(a, b.0, b.1) } +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftlins_v8i16(a: int16x8_t, b: int16x8_t) -> int16x8_t { +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i16")] - fn _vshiftlins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v16i8.p0")] + fn _vst1q_s8_x2(ptr: *mut i8, a: int8x16_t, b: int8x16_t); } - unsafe { _vshiftlins_v8i16(a, b, const { int16x8_t([N as i16; 8]) }) } + _vst1q_s8_x2(a, b.0, b.1) } +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftlins_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t { +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i8")] - fn _vshiftlins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v4i16.p0")] + fn _vst1_s16_x2(ptr: *mut i16, a: int16x4_t, b: int16x4_t); } - unsafe { _vshiftlins_v8i8(a, b, const { int8x8_t([N as i8; 8]) }) } + _vst1_s16_x2(a, b.0, b.1) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v16i8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftrins_v16i8(a: int8x16_t, b: int8x16_t) -> int8x16_t { +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v16i8")] - fn _vshiftrins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v8i16.p0")] + fn _vst1q_s16_x2(ptr: *mut i16, a: int16x8_t, b: int16x8_t); } - unsafe { _vshiftrins_v16i8(a, b, const { int8x16_t([-N as i8; 16]) }) } + _vst1q_s16_x2(a, b.0, b.1) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v1i64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftrins_v1i64(a: int64x1_t, b: int64x1_t) -> int64x1_t { +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v1i64")] - fn _vshiftrins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v2i32.p0")] + fn _vst1_s32_x2(ptr: *mut i32, a: int32x2_t, b: int32x2_t); } - unsafe { _vshiftrins_v1i64(a, b, const { int64x1_t([-N as i64; 1]) }) } + _vst1_s32_x2(a, b.0, b.1) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v2i32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftrins_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t { +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i32")] - fn _vshiftrins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v4i32.p0")] + fn _vst1q_s32_x2(ptr: *mut i32, a: int32x4_t, b: int32x4_t); } - unsafe { _vshiftrins_v2i32(a, b, const { int32x2_t([-N; 2]) }) } + _vst1q_s32_x2(a, b.0, b.1) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v2i64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftrins_v2i64(a: int64x2_t, b: int64x2_t) -> int64x2_t { +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i64")] - fn _vshiftrins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v1i64.p0")] + fn _vst1_s64_x2(ptr: *mut i64, a: int64x1_t, b: int64x1_t); } - unsafe { _vshiftrins_v2i64(a, b, const { int64x2_t([-N as i64; 2]) }) } + _vst1_s64_x2(a, b.0, b.1) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v4i16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftrins_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t { +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i16")] - fn _vshiftrins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v2i64.p0")] + fn _vst1q_s64_x2(ptr: *mut i64, a: int64x2_t, b: int64x2_t); } - unsafe { _vshiftrins_v4i16(a, b, const { int16x4_t([-N as i16; 4]) }) } + _vst1q_s64_x2(a, b.0, b.1) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v4i32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftrins_v4i32(a: int32x4_t, b: int32x4_t) -> int32x4_t { +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i32")] - fn _vshiftrins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t; + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v8i8.p0" + )] + fn _vst1_s8_x3(a: int8x8_t, b: int8x8_t, c: int8x8_t, ptr: *mut i8); } - unsafe { _vshiftrins_v4i32(a, b, const { int32x4_t([-N; 4]) }) } + _vst1_s8_x3(b.0, b.1, b.2, a) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v8i16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftrins_v8i16(a: int16x8_t, b: int16x8_t) -> int16x8_t { +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i16")] - fn _vshiftrins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t; + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v16i8.p0" + )] + fn _vst1q_s8_x3(a: int8x16_t, b: int8x16_t, c: int8x16_t, ptr: *mut i8); } - unsafe { _vshiftrins_v8i16(a, b, const { int16x8_t([-N as i16; 8]) }) } + _vst1q_s8_x3(b.0, b.1, b.2, a) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v8i8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[rustc_legacy_const_generics(2)] -fn vshiftrins_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t { +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i8")] - fn _vshiftrins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v4i16.p0" + )] + fn _vst1_s16_x3(a: int16x4_t, b: int16x4_t, c: int16x4_t, ptr: *mut i16); } - unsafe { _vshiftrins_v8i8(a, b, const { int8x8_t([-N as i8; 8]) }) } + _vst1_s16_x3(b.0, b.1, b.2, a) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshl_n_s8(a: int8x8_t) -> int8x8_t { - static_assert_uimm_bits!(N, 3); - unsafe { simd_shl(a, vdup_n_s8(N as _)) } +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v8i16.p0" + )] + fn _vst1q_s16_x3(a: int16x8_t, b: int16x8_t, c: int16x8_t, ptr: *mut i16); + } + _vst1q_s16_x3(b.0, b.1, b.2, a) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s8)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshlq_n_s8(a: int8x16_t) -> int8x16_t { - static_assert_uimm_bits!(N, 3); - unsafe { simd_shl(a, vdupq_n_s8(N as _)) } +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v2i32.p0" + )] + fn _vst1_s32_x3(a: int32x2_t, b: int32x2_t, c: int32x2_t, ptr: *mut i32); + } + _vst1_s32_x3(b.0, b.1, b.2, a) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshl_n_s16(a: int16x4_t) -> int16x4_t { - static_assert_uimm_bits!(N, 4); - unsafe { simd_shl(a, vdup_n_s16(N as _)) } +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v4i32.p0" + )] + fn _vst1q_s32_x3(a: int32x4_t, b: int32x4_t, c: int32x4_t, ptr: *mut i32); + } + _vst1q_s32_x3(b.0, b.1, b.2, a) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshlq_n_s16(a: int16x8_t) -> int16x8_t { - static_assert_uimm_bits!(N, 4); - unsafe { simd_shl(a, vdupq_n_s16(N as _)) } +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v1i64.p0" + )] + fn _vst1_s64_x3(a: int64x1_t, b: int64x1_t, c: int64x1_t, ptr: *mut i64); + } + _vst1_s64_x3(b.0, b.1, b.2, a) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshl_n_s32(a: int32x2_t) -> int32x2_t { - static_assert_uimm_bits!(N, 5); - unsafe { simd_shl(a, vdup_n_s32(N as _)) } +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x3.v2i64.p0" + )] + fn _vst1q_s64_x3(a: int64x2_t, b: int64x2_t, c: int64x2_t, ptr: *mut i64); + } + _vst1q_s64_x3(b.0, b.1, b.2, a) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s32)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshlq_n_s32(a: int32x4_t) -> int32x4_t { - static_assert_uimm_bits!(N, 5); - unsafe { simd_shl(a, vdupq_n_s32(N as _)) } +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v8i8.p0")] + fn _vst1_s8_x3(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t); + } + _vst1_s8_x3(a, b.0, b.1, b.2) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshl_n_s64(a: int64x1_t) -> int64x1_t { - static_assert_uimm_bits!(N, 6); - unsafe { simd_shl(a, vdup_n_s64(N as _)) } +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v16i8.p0")] + fn _vst1q_s8_x3(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t); + } + _vst1q_s8_x3(a, b.0, b.1, b.2) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v4i16.p0")] + fn _vst1_s16_x3(ptr: *mut i16, a: int16x4_t, b: int16x4_t, c: int16x4_t); + } + _vst1_s16_x3(a, b.0, b.1, b.2) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v8i16.p0")] + fn _vst1q_s16_x3(ptr: *mut i16, a: int16x8_t, b: int16x8_t, c: int16x8_t); + } + _vst1q_s16_x3(a, b.0, b.1, b.2) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v2i32.p0")] + fn _vst1_s32_x3(ptr: *mut i32, a: int32x2_t, b: int32x2_t, c: int32x2_t); + } + _vst1_s32_x3(a, b.0, b.1, b.2) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v4i32.p0")] + fn _vst1q_s32_x3(ptr: *mut i32, a: int32x4_t, b: int32x4_t, c: int32x4_t); + } + _vst1q_s32_x3(a, b.0, b.1, b.2) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v1i64.p0")] + fn _vst1_s64_x3(ptr: *mut i64, a: int64x1_t, b: int64x1_t, c: int64x1_t); + } + _vst1_s64_x3(a, b.0, b.1, b.2) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v2i64.p0")] + fn _vst1q_s64_x3(ptr: *mut i64, a: int64x2_t, b: int64x2_t, c: int64x2_t); + } + _vst1q_s64_x3(a, b.0, b.1, b.2) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v8i8.p0" + )] + fn _vst1_s8_x4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, ptr: *mut i8); + } + _vst1_s8_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v16i8.p0" + )] + fn _vst1q_s8_x4(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, ptr: *mut i8); + } + _vst1q_s8_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v4i16.p0" + )] + fn _vst1_s16_x4(a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, ptr: *mut i16); + } + _vst1_s16_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v8i16.p0" + )] + fn _vst1q_s16_x4(a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, ptr: *mut i16); + } + _vst1q_s16_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v2i32.p0" + )] + fn _vst1_s32_x4(a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, ptr: *mut i32); + } + _vst1_s32_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v4i32.p0" + )] + fn _vst1q_s32_x4(a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, ptr: *mut i32); + } + _vst1q_s32_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v1i64.p0" + )] + fn _vst1_s64_x4(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, ptr: *mut i64); + } + _vst1_s64_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st1x4.v2i64.p0" + )] + fn _vst1q_s64_x4(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, ptr: *mut i64); + } + _vst1q_s64_x4(b.0, b.1, b.2, b.3, a) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v8i8.p0")] + fn _vst1_s8_x4(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t); + } + _vst1_s8_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v16i8.p0")] + fn _vst1q_s8_x4(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t); + } + _vst1q_s8_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v4i16.p0")] + fn _vst1_s16_x4(ptr: *mut i16, a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t); + } + _vst1_s16_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v8i16.p0")] + fn _vst1q_s16_x4(ptr: *mut i16, a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t); + } + _vst1q_s16_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v2i32.p0")] + fn _vst1_s32_x4(ptr: *mut i32, a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t); + } + _vst1_s32_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v4i32.p0")] + fn _vst1q_s32_x4(ptr: *mut i32, a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t); + } + _vst1q_s32_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v1i64.p0")] + fn _vst1_s64_x4(ptr: *mut i64, a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t); + } + _vst1_s64_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v2i64.p0")] + fn _vst1q_s64_x4(ptr: *mut i64, a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t); + } + _vst1q_s64_x4(a, b.0, b.1, b.2, b.3) +} +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -60129,21 +60002,21 @@ pub fn vshl_n_s64(a: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshlq_n_s64(a: int64x2_t) -> int64x2_t { - static_assert_uimm_bits!(N, 6); - unsafe { simd_shl(a, vdupq_n_s64(N as _)) } +pub unsafe fn vst1_u8_x2(a: *mut u8, b: uint8x8x2_t) { + vst1_s8_x2(transmute(a), transmute(b)) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u8)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -60152,21 +60025,21 @@ pub fn vshlq_n_s64(a: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshl_n_u8(a: uint8x8_t) -> uint8x8_t { - static_assert_uimm_bits!(N, 3); - unsafe { simd_shl(a, vdup_n_u8(N as _)) } +pub unsafe fn vst1_u8_x3(a: *mut u8, b: uint8x8x3_t) { + vst1_s8_x3(transmute(a), transmute(b)) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u8)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -60175,21 +60048,21 @@ pub fn vshl_n_u8(a: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshlq_n_u8(a: uint8x16_t) -> uint8x16_t { - static_assert_uimm_bits!(N, 3); - unsafe { simd_shl(a, vdupq_n_u8(N as _)) } +pub unsafe fn vst1_u8_x4(a: *mut u8, b: uint8x8x4_t) { + vst1_s8_x4(transmute(a), transmute(b)) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -60198,21 +60071,21 @@ pub fn vshlq_n_u8(a: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshl_n_u16(a: uint16x4_t) -> uint16x4_t { - static_assert_uimm_bits!(N, 4); - unsafe { simd_shl(a, vdup_n_u16(N as _)) } +pub unsafe fn vst1q_u8_x2(a: *mut u8, b: uint8x16x2_t) { + vst1q_s8_x2(transmute(a), transmute(b)) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -60221,21 +60094,21 @@ pub fn vshl_n_u16(a: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshlq_n_u16(a: uint16x8_t) -> uint16x8_t { - static_assert_uimm_bits!(N, 4); - unsafe { simd_shl(a, vdupq_n_u16(N as _)) } +pub unsafe fn vst1q_u8_x3(a: *mut u8, b: uint8x16x3_t) { + vst1q_s8_x3(transmute(a), transmute(b)) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -60244,21 +60117,21 @@ pub fn vshlq_n_u16(a: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshl_n_u32(a: uint32x2_t) -> uint32x2_t { - static_assert_uimm_bits!(N, 5); - unsafe { simd_shl(a, vdup_n_u32(N as _)) } +pub unsafe fn vst1q_u8_x4(a: *mut u8, b: uint8x16x4_t) { + vst1q_s8_x4(transmute(a), transmute(b)) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -60267,21 +60140,21 @@ pub fn vshl_n_u32(a: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshlq_n_u32(a: uint32x4_t) -> uint32x4_t { - static_assert_uimm_bits!(N, 5); - unsafe { simd_shl(a, vdupq_n_u32(N as _)) } +pub unsafe fn vst1_u16_x2(a: *mut u16, b: uint16x4x2_t) { + vst1_s16_x2(transmute(a), transmute(b)) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u64)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -60290,21 +60163,21 @@ pub fn vshlq_n_u32(a: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshl_n_u64(a: uint64x1_t) -> uint64x1_t { - static_assert_uimm_bits!(N, 6); - unsafe { simd_shl(a, vdup_n_u64(N as _)) } +pub unsafe fn vst1_u16_x3(a: *mut u16, b: uint16x4x3_t) { + vst1_s16_x3(transmute(a), transmute(b)) } -#[doc = "Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u64)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shl, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -60313,19 +60186,20 @@ pub fn vshl_n_u64(a: uint64x1_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshlq_n_u64(a: uint64x2_t) -> uint64x2_t { - static_assert_uimm_bits!(N, 6); - unsafe { simd_shl(a, vdupq_n_u64(N as _)) } +pub unsafe fn vst1_u16_x4(a: *mut u16, b: uint16x4x4_t) { + vst1_s16_x4(transmute(a), transmute(b)) } -#[doc = "Signed Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s8)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -60335,26 +60209,20 @@ pub fn vshlq_n_u64(a: uint64x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sshl.v8i8" - )] - fn _vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - } - unsafe { _vshl_s8(a, b) } +pub unsafe fn vst1q_u16_x2(a: *mut u16, b: uint16x8x2_t) { + vst1q_s16_x2(transmute(a), transmute(b)) } -#[doc = "Signed Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s8)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -60364,26 +60232,20 @@ pub fn vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v16i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sshl.v16i8" - )] - fn _vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - } - unsafe { _vshlq_s8(a, b) } +pub unsafe fn vst1q_u16_x3(a: *mut u16, b: uint16x8x3_t) { + vst1q_s16_x3(transmute(a), transmute(b)) } -#[doc = "Signed Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -60393,26 +60255,20 @@ pub fn vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sshl.v4i16" - )] - fn _vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - } - unsafe { _vshl_s16(a, b) } +pub unsafe fn vst1q_u16_x4(a: *mut u16, b: uint16x8x4_t) { + vst1q_s16_x4(transmute(a), transmute(b)) } -#[doc = "Signed Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -60422,26 +60278,20 @@ pub fn vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sshl.v8i16" - )] - fn _vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - } - unsafe { _vshlq_s16(a, b) } +pub unsafe fn vst1_u32_x2(a: *mut u32, b: uint32x2x2_t) { + vst1_s32_x2(transmute(a), transmute(b)) } -#[doc = "Signed Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -60451,26 +60301,20 @@ pub fn vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sshl.v2i32" - )] - fn _vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - } - unsafe { _vshl_s32(a, b) } +pub unsafe fn vst1_u32_x3(a: *mut u32, b: uint32x2x3_t) { + vst1_s32_x3(transmute(a), transmute(b)) } -#[doc = "Signed Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -60480,26 +60324,20 @@ pub fn vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sshl.v4i32" - )] - fn _vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - } - unsafe { _vshlq_s32(a, b) } +pub unsafe fn vst1_u32_x4(a: *mut u32, b: uint32x2x4_t) { + vst1_s32_x4(transmute(a), transmute(b)) } -#[doc = "Signed Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s64)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -60509,26 +60347,20 @@ pub fn vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v1i64")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sshl.v1i64" - )] - fn _vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; - } - unsafe { _vshl_s64(a, b) } +pub unsafe fn vst1q_u32_x2(a: *mut u32, b: uint32x4x2_t) { + vst1q_s32_x2(transmute(a), transmute(b)) } -#[doc = "Signed Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s64)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -60538,26 +60370,20 @@ pub fn vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i64")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.sshl.v2i64" - )] - fn _vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - } - unsafe { _vshlq_s64(a, b) } +pub unsafe fn vst1q_u32_x3(a: *mut u32, b: uint32x4x3_t) { + vst1q_s32_x3(transmute(a), transmute(b)) } -#[doc = "Unsigned Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u8)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -60567,26 +60393,20 @@ pub fn vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ushl.v8i8" - )] - fn _vshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; - } - unsafe { _vshl_u8(a, b) } +pub unsafe fn vst1q_u32_x4(a: *mut u32, b: uint32x4x4_t) { + vst1q_s32_x4(transmute(a), transmute(b)) } -#[doc = "Unsigned Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u8)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -60596,26 +60416,20 @@ pub fn vshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v16i8")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ushl.v16i8" - )] - fn _vshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; - } - unsafe { _vshlq_u8(a, b) } +pub unsafe fn vst1_u64_x2(a: *mut u64, b: uint64x1x2_t) { + vst1_s64_x2(transmute(a), transmute(b)) } -#[doc = "Unsigned Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -60625,26 +60439,20 @@ pub fn vshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ushl.v4i16" - )] - fn _vshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; - } - unsafe { _vshl_u16(a, b) } +pub unsafe fn vst1_u64_x3(a: *mut u64, b: uint64x1x3_t) { + vst1_s64_x3(transmute(a), transmute(b)) } -#[doc = "Unsigned Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -60654,26 +60462,20 @@ pub fn vshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i16")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ushl.v8i16" - )] - fn _vshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; - } - unsafe { _vshlq_u16(a, b) } +pub unsafe fn vst1_u64_x4(a: *mut u64, b: uint64x1x4_t) { + vst1_s64_x4(transmute(a), transmute(b)) } -#[doc = "Unsigned Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -60683,26 +60485,20 @@ pub fn vshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ushl.v2i32" - )] - fn _vshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; - } - unsafe { _vshl_u32(a, b) } +pub unsafe fn vst1q_u64_x2(a: *mut u64, b: uint64x2x2_t) { + vst1q_s64_x2(transmute(a), transmute(b)) } -#[doc = "Unsigned Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -60712,26 +60508,20 @@ pub fn vshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i32")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ushl.v4i32" - )] - fn _vshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; - } - unsafe { _vshlq_u32(a, b) } +pub unsafe fn vst1q_u64_x3(a: *mut u64, b: uint64x2x3_t) { + vst1q_s64_x3(transmute(a), transmute(b)) } -#[doc = "Unsigned Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u64)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -60741,26 +60531,20 @@ pub fn vshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v1i64")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ushl.v1i64" - )] - fn _vshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; - } - unsafe { _vshl_u64(a, b) } +pub unsafe fn vst1q_u64_x4(a: *mut u64, b: uint64x2x4_t) { + vst1q_s64_x4(transmute(a), transmute(b)) } -#[doc = "Unsigned Shift left"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u64)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushl) + assert_instr(st1) )] #[cfg_attr( not(target_arch = "arm"), @@ -60770,28 +60554,21 @@ pub fn vshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i64")] - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ushl.v2i64" - )] - fn _vshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; - } - unsafe { _vshlq_u64(a, b) } +pub unsafe fn vst1_p8_x2(a: *mut p8, b: poly8x8x2_t) { + vst1_s8_x2(transmute(a), transmute(b)) } -#[doc = "Signed shift left long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_s16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s16", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshll, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -60800,21 +60577,21 @@ pub fn vshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshll_n_s16(a: int16x4_t) -> int32x4_t { - static_assert!(N >= 0 && N <= 16); - unsafe { simd_shl(simd_cast(a), vdupq_n_s32(N as _)) } +pub unsafe fn vst1_p8_x3(a: *mut p8, b: poly8x8x3_t) { + vst1_s8_x3(transmute(a), transmute(b)) } -#[doc = "Signed shift left long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_s32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s32", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshll, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -60823,21 +60600,21 @@ pub fn vshll_n_s16(a: int16x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshll_n_s32(a: int32x2_t) -> int64x2_t { - static_assert!(N >= 0 && N <= 32); - unsafe { simd_shl(simd_cast(a), vdupq_n_s64(N as _)) } +pub unsafe fn vst1_p8_x4(a: *mut p8, b: poly8x8x4_t) { + vst1_s8_x4(transmute(a), transmute(b)) } -#[doc = "Signed shift left long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_s8)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s8", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshll, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -60846,21 +60623,21 @@ pub fn vshll_n_s32(a: int32x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshll_n_s8(a: int8x8_t) -> int16x8_t { - static_assert!(N >= 0 && N <= 8); - unsafe { simd_shl(simd_cast(a), vdupq_n_s16(N as _)) } +pub unsafe fn vst1q_p8_x2(a: *mut p8, b: poly8x16x2_t) { + vst1q_s8_x2(transmute(a), transmute(b)) } -#[doc = "Signed shift left long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_u16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u16", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushll, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -60869,21 +60646,21 @@ pub fn vshll_n_s8(a: int8x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshll_n_u16(a: uint16x4_t) -> uint32x4_t { - static_assert!(N >= 0 && N <= 16); - unsafe { simd_shl(simd_cast(a), vdupq_n_u32(N as _)) } +pub unsafe fn vst1q_p8_x3(a: *mut p8, b: poly8x16x3_t) { + vst1q_s8_x3(transmute(a), transmute(b)) } -#[doc = "Signed shift left long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_u32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u32", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushll, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -60892,21 +60669,21 @@ pub fn vshll_n_u16(a: uint16x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshll_n_u32(a: uint32x2_t) -> uint64x2_t { - static_assert!(N >= 0 && N <= 32); - unsafe { simd_shl(simd_cast(a), vdupq_n_u64(N as _)) } +pub unsafe fn vst1q_p8_x4(a: *mut p8, b: poly8x16x4_t) { + vst1q_s8_x4(transmute(a), transmute(b)) } -#[doc = "Signed shift left long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_u8)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u8", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushll, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -60915,21 +60692,21 @@ pub fn vshll_n_u32(a: uint32x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshll_n_u8(a: uint8x8_t) -> uint16x8_t { - static_assert!(N >= 0 && N <= 8); - unsafe { simd_shl(simd_cast(a), vdupq_n_u16(N as _)) } +pub unsafe fn vst1_p16_x2(a: *mut p16, b: poly16x4x2_t) { + vst1_s16_x2(transmute(a), transmute(b)) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s8)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshr, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -60938,22 +60715,21 @@ pub fn vshll_n_u8(a: uint8x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshr_n_s8(a: int8x8_t) -> int8x8_t { - static_assert!(N >= 1 && N <= 8); - let n: i32 = if N == 8 { 7 } else { N }; - unsafe { simd_shr(a, vdup_n_s8(n as _)) } +pub unsafe fn vst1_p16_x3(a: *mut p16, b: poly16x4x3_t) { + vst1_s16_x3(transmute(a), transmute(b)) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s8)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshr, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -60962,22 +60738,21 @@ pub fn vshr_n_s8(a: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshrq_n_s8(a: int8x16_t) -> int8x16_t { - static_assert!(N >= 1 && N <= 8); - let n: i32 = if N == 8 { 7 } else { N }; - unsafe { simd_shr(a, vdupq_n_s8(n as _)) } +pub unsafe fn vst1_p16_x4(a: *mut p16, b: poly16x4x4_t) { + vst1_s16_x4(transmute(a), transmute(b)) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshr, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -60986,22 +60761,21 @@ pub fn vshrq_n_s8(a: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshr_n_s16(a: int16x4_t) -> int16x4_t { - static_assert!(N >= 1 && N <= 16); - let n: i32 = if N == 16 { 15 } else { N }; - unsafe { simd_shr(a, vdup_n_s16(n as _)) } +pub unsafe fn vst1q_p16_x2(a: *mut p16, b: poly16x8x2_t) { + vst1q_s16_x2(transmute(a), transmute(b)) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s16)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x3)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshr, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -61010,22 +60784,21 @@ pub fn vshr_n_s16(a: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshrq_n_s16(a: int16x8_t) -> int16x8_t { - static_assert!(N >= 1 && N <= 16); - let n: i32 = if N == 16 { 15 } else { N }; - unsafe { simd_shr(a, vdupq_n_s16(n as _)) } +pub unsafe fn vst1q_p16_x3(a: *mut p16, b: poly16x8x3_t) { + vst1q_s16_x3(transmute(a), transmute(b)) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s32)"] +#[doc = "Store multiple single-element structures to one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x4)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshr, N = 2) + assert_instr(st1) )] -#[rustc_legacy_const_generics(1)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -61034,318 +60807,198 @@ pub fn vshrq_n_s16(a: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshr_n_s32(a: int32x2_t) -> int32x2_t { - static_assert!(N >= 1 && N <= 32); - let n: i32 = if N == 32 { 31 } else { N }; - unsafe { simd_shr(a, vdup_n_s32(n as _)) } +pub unsafe fn vst1q_p16_x4(a: *mut p16, b: poly16x8x4_t) { + vst1q_s16_x4(transmute(a), transmute(b)) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s32)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshrq_n_s32(a: int32x4_t) -> int32x4_t { - static_assert!(N >= 1 && N <= 32); - let n: i32 = if N == 32 { 31 } else { N }; - unsafe { simd_shr(a, vdupq_n_s32(n as _)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1_v1i64(addr: *const i8, val: int64x1_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v1i64.p0")] + fn _vst1_v1i64(addr: *const i8, val: int64x1_t, align: i32); + } + _vst1_v1i64(addr, val, ALIGN) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s64)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshr_n_s64(a: int64x1_t) -> int64x1_t { - static_assert!(N >= 1 && N <= 64); - let n: i32 = if N == 64 { 63 } else { N }; - unsafe { simd_shr(a, vdup_n_s64(n as _)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1_v2f32(addr: *const i8, val: float32x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2f32.p0")] + fn _vst1_v2f32(addr: *const i8, val: float32x2_t, align: i32); + } + _vst1_v2f32(addr, val, ALIGN) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s64)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sshr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshrq_n_s64(a: int64x2_t) -> int64x2_t { - static_assert!(N >= 1 && N <= 64); - let n: i32 = if N == 64 { 63 } else { N }; - unsafe { simd_shr(a, vdupq_n_s64(n as _)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1_v2i32(addr: *const i8, val: int32x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2i32.p0")] + fn _vst1_v2i32(addr: *const i8, val: int32x2_t, align: i32); + } + _vst1_v2i32(addr, val, ALIGN) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshr_n_u8(a: uint8x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); - let n: i32 = if N == 8 { - return vdup_n_u8(0); - } else { - N - }; - unsafe { simd_shr(a, vdup_n_u8(n as _)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1_v4i16(addr: *const i8, val: int16x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4i16.p0")] + fn _vst1_v4i16(addr: *const i8, val: int16x4_t, align: i32); + } + _vst1_v4i16(addr, val, ALIGN) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u8)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshrq_n_u8(a: uint8x16_t) -> uint8x16_t { - static_assert!(N >= 1 && N <= 8); - let n: i32 = if N == 8 { - return vdupq_n_u8(0); - } else { - N - }; - unsafe { simd_shr(a, vdupq_n_u8(n as _)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1_v8i8(addr: *const i8, val: int8x8_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8i8.p0")] + fn _vst1_v8i8(addr: *const i8, val: int8x8_t, align: i32); + } + _vst1_v8i8(addr, val, ALIGN) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u16)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshr_n_u16(a: uint16x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); - let n: i32 = if N == 16 { - return vdup_n_u16(0); - } else { - N - }; - unsafe { simd_shr(a, vdup_n_u16(n as _)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1q_v16i8(addr: *const i8, val: int8x16_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v16i8.p0")] + fn _vst1q_v16i8(addr: *const i8, val: int8x16_t, align: i32); + } + _vst1q_v16i8(addr, val, ALIGN) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u16)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshrq_n_u16(a: uint16x8_t) -> uint16x8_t { - static_assert!(N >= 1 && N <= 16); - let n: i32 = if N == 16 { - return vdupq_n_u16(0); - } else { - N - }; - unsafe { simd_shr(a, vdupq_n_u16(n as _)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1q_v2i64(addr: *const i8, val: int64x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2i64.p0")] + fn _vst1q_v2i64(addr: *const i8, val: int64x2_t, align: i32); + } + _vst1q_v2i64(addr, val, ALIGN) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u32)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshr_n_u32(a: uint32x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); - let n: i32 = if N == 32 { - return vdup_n_u32(0); - } else { - N - }; - unsafe { simd_shr(a, vdup_n_u32(n as _)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1q_v4f32(addr: *const i8, val: float32x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4f32.p0")] + fn _vst1q_v4f32(addr: *const i8, val: float32x4_t, align: i32); + } + _vst1q_v4f32(addr, val, ALIGN) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u32)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshrq_n_u32(a: uint32x4_t) -> uint32x4_t { - static_assert!(N >= 1 && N <= 32); - let n: i32 = if N == 32 { - return vdupq_n_u32(0); - } else { - N - }; - unsafe { simd_shr(a, vdupq_n_u32(n as _)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1q_v4i32(addr: *const i8, val: int32x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4i32.p0")] + fn _vst1q_v4i32(addr: *const i8, val: int32x4_t, align: i32); + } + _vst1q_v4i32(addr, val, ALIGN) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u64)"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshr_n_u64(a: uint64x1_t) -> uint64x1_t { - static_assert!(N >= 1 && N <= 64); - let n: i32 = if N == 64 { - return vdup_n_u64(0); - } else { - N - }; - unsafe { simd_shr(a, vdup_n_u64(n as _)) } +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1q_v8i16(addr: *const i8, val: int16x8_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8i16.p0")] + fn _vst1q_v8i16(addr: *const i8, val: int16x8_t, align: i32); + } + _vst1q_v8i16(addr, val, ALIGN) } -#[doc = "Shift right"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u64)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v4f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ushr, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshrq_n_u64(a: uint64x2_t) -> uint64x2_t { - static_assert!(N >= 1 && N <= 64); - let n: i32 = if N == 64 { - return vdupq_n_u64(0); - } else { - N - }; - unsafe { simd_shr(a, vdupq_n_u64(n as _)) } +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +unsafe fn vst1_v4f16(addr: *const i8, val: float16x4_t, align: i32) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4f16.p0")] + fn _vst1_v4f16(addr: *const i8, val: float16x4_t, align: i32); + } + _vst1_v4f16(addr, val, align) } -#[doc = "Shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_s16)"] +#[doc = "Store multiple single-element structures from one, two, three, or four registers."] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v8f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))] +#[target_feature(enable = "neon,fp16")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] +unsafe fn vst1q_v8f16(addr: *const i8, val: float16x8_t, align: i32) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8f16.p0")] + fn _vst1q_v8f16(addr: *const i8, val: float16x8_t, align: i32); + } + _vst1q_v8f16(addr, val, align) +} +#[doc = "Store multiple single-element structures from one, two, three, or four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shrn, N = 2) + assert_instr(nop, LANE = 0) )] -#[rustc_legacy_const_generics(1)] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -61354,397 +61007,686 @@ pub fn vshrq_n_u64(a: uint64x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vshrn_n_s16(a: int16x8_t) -> int8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe { simd_cast(simd_shr(a, vdupq_n_s16(N as _))) } +pub unsafe fn vst1q_lane_p64(a: *mut p64, b: poly64x2_t) { + static_assert_uimm_bits!(LANE, 1); + *a = simd_extract!(b, LANE as u32); } -#[doc = "Shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_s32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shrn, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshrn_n_s32(a: int32x4_t) -> int16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe { simd_cast(simd_shr(a, vdupq_n_s32(N as _))) } +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2_f16(a: *mut f16, b: float16x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2.v4f16.p0" + )] + fn _vst2_f16(a: float16x4_t, b: float16x4_t, ptr: *mut i8); + } + _vst2_f16(b.0, b.1, a as _) } -#[doc = "Shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_s64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shrn, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshrn_n_s64(a: int64x2_t) -> int32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe { simd_cast(simd_shr(a, vdupq_n_s64(N as _))) } +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2q_f16(a: *mut f16, b: float16x8x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2.v8f16.p0" + )] + fn _vst2q_f16(a: float16x8_t, b: float16x8_t, ptr: *mut i8); + } + _vst2q_f16(b.0, b.1, a as _) } -#[doc = "Shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_u16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shrn, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshrn_n_u16(a: uint16x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe { simd_cast(simd_shr(a, vdupq_n_u16(N as _))) } +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2_f16(a: *mut f16, b: float16x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0.v4f16")] + fn _vst2_f16(ptr: *mut i8, a: float16x4_t, b: float16x4_t, size: i32); + } + _vst2_f16(a as _, b.0, b.1, 2) } -#[doc = "Shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_u32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shrn, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshrn_n_u32(a: uint32x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe { simd_cast(simd_shr(a, vdupq_n_u32(N as _))) } +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2q_f16(a: *mut f16, b: float16x8x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0.v8f16")] + fn _vst2q_f16(ptr: *mut i8, a: float16x8_t, b: float16x8_t, size: i32); + } + _vst2q_f16(a as _, b.0, b.1, 2) } -#[doc = "Shift right narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_u64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(shrn, N = 2) -)] -#[rustc_legacy_const_generics(1)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vshrn_n_u64(a: uint64x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe { simd_cast(simd_shr(a, vdupq_n_u64(N as _))) } -} -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s8)"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsli_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - static_assert_uimm_bits!(N, 3); - vshiftlins_v8i8::(a, b) +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) { + crate::core_arch::macros::interleaving_store!(f32, 2, 2, a, b) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s8)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsliq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - static_assert_uimm_bits!(N, 3); - vshiftlins_v16i8::(a, b) +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) { + crate::core_arch::macros::interleaving_store!(f32, 4, 2, a, b) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsli_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - static_assert_uimm_bits!(N, 4); - vshiftlins_v4i16::(a, b) +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) { + crate::core_arch::macros::interleaving_store!(i8, 8, 2, a, b) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsliq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - static_assert_uimm_bits!(N, 4); - vshiftlins_v8i16::(a, b) +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) { + crate::core_arch::macros::interleaving_store!(i8, 16, 2, a, b) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsli_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - static_assert!(N >= 0 && N <= 31); - vshiftlins_v2i32::(a, b) +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) { + crate::core_arch::macros::interleaving_store!(i16, 4, 2, a, b) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsliq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - static_assert!(N >= 0 && N <= 31); - vshiftlins_v4i32::(a, b) +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) { + crate::core_arch::macros::interleaving_store!(i16, 8, 2, a, b) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsli_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - static_assert!(N >= 0 && N <= 63); - vshiftlins_v1i64::(a, b) +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) { + crate::core_arch::macros::interleaving_store!(i32, 2, 2, a, b) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsliq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - static_assert!(N >= 0 && N <= 63); - vshiftlins_v2i64::(a, b) +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st2))] +pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) { + crate::core_arch::macros::interleaving_store!(i32, 4, 2, a, b) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u8)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsli_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - static_assert_uimm_bits!(N, 3); - unsafe { transmute(vshiftlins_v8i8::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v2f32.p0")] + fn _vst2_f32(ptr: *mut i8, a: float32x2_t, b: float32x2_t, size: i32); + } + _vst2_f32(a as _, b.0, b.1, 4) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u8)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsliq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - static_assert_uimm_bits!(N, 3); - unsafe { transmute(vshiftlins_v16i8::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v4f32.p0")] + fn _vst2q_f32(ptr: *mut i8, a: float32x4_t, b: float32x4_t, size: i32); + } + _vst2q_f32(a as _, b.0, b.1, 4) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsli_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - static_assert_uimm_bits!(N, 4); - unsafe { transmute(vshiftlins_v4i16::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v8i8.p0")] + fn _vst2_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, size: i32); + } + _vst2_s8(a as _, b.0, b.1, 1) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsliq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - static_assert_uimm_bits!(N, 4); - unsafe { transmute(vshiftlins_v8i16::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v16i8.p0")] + fn _vst2q_s8(ptr: *mut i8, a: int8x16_t, b: int8x16_t, size: i32); + } + _vst2q_s8(a as _, b.0, b.1, 1) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsli_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - static_assert!(N >= 0 && N <= 31); - unsafe { transmute(vshiftlins_v2i32::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v4i16.p0")] + fn _vst2_s16(ptr: *mut i8, a: int16x4_t, b: int16x4_t, size: i32); + } + _vst2_s16(a as _, b.0, b.1, 2) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsliq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - static_assert!(N >= 0 && N <= 31); - unsafe { transmute(vshiftlins_v4i32::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v8i16.p0")] + fn _vst2q_s16(ptr: *mut i8, a: int16x8_t, b: int16x8_t, size: i32); + } + _vst2q_s16(a as _, b.0, b.1, 2) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsli_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - static_assert!(N >= 0 && N <= 63); - unsafe { transmute(vshiftlins_v1i64::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v2i32.p0")] + fn _vst2_s32(ptr: *mut i8, a: int32x2_t, b: int32x2_t, size: i32); + } + _vst2_s32(a as _, b.0, b.1, 4) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsliq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - static_assert!(N >= 0 && N <= 63); - unsafe { transmute(vshiftlins_v2i64::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v4i32.p0")] + fn _vst2q_s32(ptr: *mut i8, a: int32x4_t, b: int32x4_t, size: i32); + } + _vst2q_s32(a as _, b.0, b.1, 4) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p8)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] #[rustc_legacy_const_generics(2)] -pub fn vsli_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - static_assert_uimm_bits!(N, 3); - unsafe { transmute(vshiftlins_v8i8::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst2_lane_f16(a: *mut f16, b: float16x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v4f16.p0" + )] + fn _vst2_lane_f16(a: float16x4_t, b: float16x4_t, n: i64, ptr: *mut i8); + } + _vst2_lane_f16(b.0, b.1, LANE as i64, a as _) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p8)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] #[rustc_legacy_const_generics(2)] -pub fn vsliq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { - static_assert_uimm_bits!(N, 3); - unsafe { transmute(vshiftlins_v16i8::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst2q_lane_f16(a: *mut f16, b: float16x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v8f16.p0" + )] + fn _vst2q_lane_f16(a: float16x8_t, b: float16x8_t, n: i64, ptr: *mut i8); + } + _vst2q_lane_f16(b.0, b.1, LANE as i64, a as _) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] +#[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] #[rustc_legacy_const_generics(2)] -pub fn vsli_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - static_assert_uimm_bits!(N, 4); - unsafe { transmute(vshiftlins_v4i16::(transmute(a), transmute(b))) } +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst2_lane_f16(a: *mut f16, b: float16x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0.v4f16")] + fn _vst2_lane_f16(ptr: *mut i8, a: float16x4_t, b: float16x4_t, n: i32, size: i32); + } + _vst2_lane_f16(a as _, b.0, b.1, LANE, 2) } -#[doc = "Shift Left and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] +#[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] #[rustc_legacy_const_generics(2)] -pub fn vsliq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - static_assert_uimm_bits!(N, 4); - unsafe { transmute(vshiftlins_v8i16::(transmute(a), transmute(b))) } +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst2q_lane_f16(a: *mut f16, b: float16x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0.v8f16")] + fn _vst2q_lane_f16(ptr: *mut i8, a: float16x8_t, b: float16x8_t, n: i32, size: i32); + } + _vst2q_lane_f16(a as _, b.0, b.1, LANE, 2) } -#[doc = "Signed shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s8)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ssra, N = 2) -)] +#[cfg(not(target_arch = "arm"))] #[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vsra_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe { simd_add(a, vshr_n_s8::(b)) } +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2_lane_f32(a: *mut f32, b: float32x2x2_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v2f32.p0" + )] + fn _vst2_lane_f32(a: float32x2_t, b: float32x2_t, n: i64, ptr: *mut i8); + } + _vst2_lane_f32(b.0, b.1, LANE as i64, a as _) } -#[doc = "Signed shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s8)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2q_lane_f32(a: *mut f32, b: float32x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v4f32.p0" + )] + fn _vst2q_lane_f32(a: float32x4_t, b: float32x4_t, n: i64, ptr: *mut i8); + } + _vst2q_lane_f32(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2_lane_s8(a: *mut i8, b: int8x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v8i8.p0" + )] + fn _vst2_lane_s8(a: int8x8_t, b: int8x8_t, n: i64, ptr: *mut i8); + } + _vst2_lane_s8(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2_lane_s16(a: *mut i16, b: int16x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v4i16.p0" + )] + fn _vst2_lane_s16(a: int16x4_t, b: int16x4_t, n: i64, ptr: *mut i8); + } + _vst2_lane_s16(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2q_lane_s16(a: *mut i16, b: int16x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v8i16.p0" + )] + fn _vst2q_lane_s16(a: int16x8_t, b: int16x8_t, n: i64, ptr: *mut i8); + } + _vst2q_lane_s16(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2_lane_s32(a: *mut i32, b: int32x2x2_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v2i32.p0" + )] + fn _vst2_lane_s32(a: int32x2_t, b: int32x2_t, n: i64, ptr: *mut i8); + } + _vst2_lane_s32(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2q_lane_s32(a: *mut i32, b: int32x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st2lane.v4i32.p0" + )] + fn _vst2q_lane_s32(a: int32x4_t, b: int32x4_t, n: i64, ptr: *mut i8); + } + _vst2q_lane_s32(b.0, b.1, LANE as i64, a as _) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst2_lane_f32(a: *mut f32, b: float32x2x2_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v2f32.p0")] + fn _vst2_lane_f32(ptr: *mut i8, a: float32x2_t, b: float32x2_t, n: i32, size: i32); + } + _vst2_lane_f32(a as _, b.0, b.1, LANE, 4) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst2q_lane_f32(a: *mut f32, b: float32x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v4f32.p0")] + fn _vst2q_lane_f32(ptr: *mut i8, a: float32x4_t, b: float32x4_t, n: i32, size: i32); + } + _vst2q_lane_f32(a as _, b.0, b.1, LANE, 4) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst2_lane_s8(a: *mut i8, b: int8x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v8i8.p0")] + fn _vst2_lane_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, n: i32, size: i32); + } + _vst2_lane_s8(a as _, b.0, b.1, LANE, 1) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst2_lane_s16(a: *mut i16, b: int16x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v4i16.p0")] + fn _vst2_lane_s16(ptr: *mut i8, a: int16x4_t, b: int16x4_t, n: i32, size: i32); + } + _vst2_lane_s16(a as _, b.0, b.1, LANE, 2) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst2q_lane_s16(a: *mut i16, b: int16x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v8i16.p0")] + fn _vst2q_lane_s16(ptr: *mut i8, a: int16x8_t, b: int16x8_t, n: i32, size: i32); + } + _vst2q_lane_s16(a as _, b.0, b.1, LANE, 2) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst2_lane_s32(a: *mut i32, b: int32x2x2_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v2i32.p0")] + fn _vst2_lane_s32(ptr: *mut i8, a: int32x2_t, b: int32x2_t, n: i32, size: i32); + } + _vst2_lane_s32(a as _, b.0, b.1, LANE, 4) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +pub unsafe fn vst2q_lane_s32(a: *mut i32, b: int32x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v4i32.p0")] + fn _vst2q_lane_s32(ptr: *mut i8, a: int32x4_t, b: int32x4_t, n: i32, size: i32); + } + _vst2q_lane_s32(a as _, b.0, b.1, LANE, 4) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ssra, N = 2) + assert_instr(st2, LANE = 0) )] #[rustc_legacy_const_generics(2)] #[cfg_attr( @@ -61755,19 +61697,21 @@ pub fn vsra_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsraq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - static_assert!(N >= 1 && N <= 8); - unsafe { simd_add(a, vshrq_n_s8::(b)) } +pub unsafe fn vst2_lane_u8(a: *mut u8, b: uint8x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + vst2_lane_s8::(transmute(a), transmute(b)) } -#[doc = "Signed shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ssra, N = 2) + assert_instr(st2, LANE = 0) )] #[rustc_legacy_const_generics(2)] #[cfg_attr( @@ -61778,19 +61722,21 @@ pub fn vsraq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsra_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe { simd_add(a, vshr_n_s16::(b)) } +pub unsafe fn vst2_lane_u16(a: *mut u16, b: uint16x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + vst2_lane_s16::(transmute(a), transmute(b)) } -#[doc = "Signed shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ssra, N = 2) + assert_instr(st2, LANE = 0) )] #[rustc_legacy_const_generics(2)] #[cfg_attr( @@ -61801,19 +61747,21 @@ pub fn vsra_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsraq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - static_assert!(N >= 1 && N <= 16); - unsafe { simd_add(a, vshrq_n_s16::(b)) } +pub unsafe fn vst2q_lane_u16(a: *mut u16, b: uint16x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + vst2q_lane_s16::(transmute(a), transmute(b)) } -#[doc = "Signed shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ssra, N = 2) + assert_instr(st2, LANE = 0) )] #[rustc_legacy_const_generics(2)] #[cfg_attr( @@ -61824,19 +61772,21 @@ pub fn vsraq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsra_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe { simd_add(a, vshr_n_s32::(b)) } +pub unsafe fn vst2_lane_u32(a: *mut u32, b: uint32x2x2_t) { + static_assert_uimm_bits!(LANE, 1); + vst2_lane_s32::(transmute(a), transmute(b)) } -#[doc = "Signed shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ssra, N = 2) + assert_instr(st2, LANE = 0) )] #[rustc_legacy_const_generics(2)] #[cfg_attr( @@ -61847,19 +61797,21 @@ pub fn vsra_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsraq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - static_assert!(N >= 1 && N <= 32); - unsafe { simd_add(a, vshrq_n_s32::(b)) } +pub unsafe fn vst2q_lane_u32(a: *mut u32, b: uint32x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + vst2q_lane_s32::(transmute(a), transmute(b)) } -#[doc = "Signed shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ssra, N = 2) + assert_instr(st2, LANE = 0) )] #[rustc_legacy_const_generics(2)] #[cfg_attr( @@ -61870,19 +61822,21 @@ pub fn vsraq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsra_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - static_assert!(N >= 1 && N <= 64); - unsafe { simd_add(a, vshr_n_s64::(b)) } +pub unsafe fn vst2_lane_p8(a: *mut p8, b: poly8x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + vst2_lane_s8::(transmute(a), transmute(b)) } -#[doc = "Signed shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ssra, N = 2) + assert_instr(st2, LANE = 0) )] #[rustc_legacy_const_generics(2)] #[cfg_attr( @@ -61893,19 +61847,21 @@ pub fn vsra_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsraq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - static_assert!(N >= 1 && N <= 64); - unsafe { simd_add(a, vshrq_n_s64::(b)) } +pub unsafe fn vst2_lane_p16(a: *mut p16, b: poly16x4x2_t) { + static_assert_uimm_bits!(LANE, 2); + vst2_lane_s16::(transmute(a), transmute(b)) } -#[doc = "Unsigned shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u8)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usra, N = 2) + assert_instr(st2, LANE = 0) )] #[rustc_legacy_const_generics(2)] #[cfg_attr( @@ -61916,21 +61872,22 @@ pub fn vsraq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsra_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - static_assert!(N >= 1 && N <= 8); - unsafe { simd_add(a, vshr_n_u8::(b)) } +pub unsafe fn vst2q_lane_p16(a: *mut p16, b: poly16x8x2_t) { + static_assert_uimm_bits!(LANE, 3); + vst2q_lane_s16::(transmute(a), transmute(b)) } -#[doc = "Unsigned shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u8)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usra, N = 2) + assert_instr(nop) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -61939,21 +61896,45 @@ pub fn vsra_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsraq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - static_assert!(N >= 1 && N <= 8); - unsafe { simd_add(a, vshrq_n_u8::(b)) } +pub unsafe fn vst2_p64(a: *mut p64, b: poly64x1x2_t) { + vst2_s64(transmute(a), transmute(b)) } -#[doc = "Unsigned shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) { + core::ptr::write_unaligned(a.cast(), b) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) { + core::ptr::write_unaligned(a.cast(), b) +} +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usra, N = 2) + assert_instr(nop) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -61962,21 +61943,21 @@ pub fn vsraq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsra_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - static_assert!(N >= 1 && N <= 16); - unsafe { simd_add(a, vshr_n_u16::(b)) } +pub unsafe fn vst2_u64(a: *mut u64, b: uint64x1x2_t) { + vst2_s64(transmute(a), transmute(b)) } -#[doc = "Unsigned shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usra, N = 2) + assert_instr(st2) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -61985,21 +61966,21 @@ pub fn vsra_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsraq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - static_assert!(N >= 1 && N <= 16); - unsafe { simd_add(a, vshrq_n_u16::(b)) } +pub unsafe fn vst2_u8(a: *mut u8, b: uint8x8x2_t) { + vst2_s8(transmute(a), transmute(b)) } -#[doc = "Unsigned shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usra, N = 2) + assert_instr(st2) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -62008,21 +61989,21 @@ pub fn vsraq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsra_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - static_assert!(N >= 1 && N <= 32); - unsafe { simd_add(a, vshr_n_u32::(b)) } +pub unsafe fn vst2q_u8(a: *mut u8, b: uint8x16x2_t) { + vst2q_s8(transmute(a), transmute(b)) } -#[doc = "Unsigned shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usra, N = 2) + assert_instr(st2) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -62031,21 +62012,21 @@ pub fn vsra_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsraq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - static_assert!(N >= 1 && N <= 32); - unsafe { simd_add(a, vshrq_n_u32::(b)) } +pub unsafe fn vst2_u16(a: *mut u16, b: uint16x4x2_t) { + vst2_s16(transmute(a), transmute(b)) } -#[doc = "Unsigned shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u64)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usra, N = 2) + assert_instr(st2) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -62054,21 +62035,21 @@ pub fn vsraq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsra_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - static_assert!(N >= 1 && N <= 64); - unsafe { simd_add(a, vshr_n_u64::(b)) } +pub unsafe fn vst2q_u16(a: *mut u16, b: uint16x8x2_t) { + vst2q_s16(transmute(a), transmute(b)) } -#[doc = "Unsigned shift right and accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u64)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usra, N = 2) + assert_instr(st2) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -62077,1129 +62058,849 @@ pub fn vsra_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsraq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - static_assert!(N >= 1 && N <= 64); - unsafe { simd_add(a, vshrq_n_u64::(b)) } -} -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s8)"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsri_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - static_assert!(1 <= N && N <= 8); - vshiftrins_v8i8::(a, b) +pub unsafe fn vst2_u32(a: *mut u32, b: uint32x2x2_t) { + vst2_s32(transmute(a), transmute(b)) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s8)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsriq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - static_assert!(1 <= N && N <= 8); - vshiftrins_v16i8::(a, b) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst2q_u32(a: *mut u32, b: uint32x4x2_t) { + vst2q_s32(transmute(a), transmute(b)) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsri_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - static_assert!(1 <= N && N <= 16); - vshiftrins_v4i16::(a, b) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst2_p8(a: *mut p8, b: poly8x8x2_t) { + vst2_s8(transmute(a), transmute(b)) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s16)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsriq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - static_assert!(1 <= N && N <= 16); - vshiftrins_v8i16::(a, b) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst2q_p8(a: *mut p8, b: poly8x16x2_t) { + vst2q_s8(transmute(a), transmute(b)) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsri_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - static_assert!(1 <= N && N <= 32); - vshiftrins_v2i32::(a, b) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst2_p16(a: *mut p16, b: poly16x4x2_t) { + vst2_s16(transmute(a), transmute(b)) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s32)"] +#[doc = "Store multiple 2-element structures from two registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsriq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - static_assert!(1 <= N && N <= 32); - vshiftrins_v4i32::(a, b) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(st2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst2q_p16(a: *mut p16, b: poly16x8x2_t) { + vst2q_s16(transmute(a), transmute(b)) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s64)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] +#[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsri_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - static_assert!(1 <= N && N <= 64); - vshiftrins_v1i64::(a, b) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3_f16(a: *mut f16, b: float16x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v4f16")] + fn _vst3_f16(ptr: *mut i8, a: float16x4_t, b: float16x4_t, c: float16x4_t, size: i32); + } + _vst3_f16(a as _, b.0, b.1, b.2, 2) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s64)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] +#[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsriq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - static_assert!(1 <= N && N <= 64); - vshiftrins_v2i64::(a, b) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3q_f16(a: *mut f16, b: float16x8x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v8f16")] + fn _vst3q_f16(ptr: *mut i8, a: float16x8_t, b: float16x8_t, c: float16x8_t, size: i32); + } + _vst3q_f16(a as _, b.0, b.1, b.2, 2) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u8)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsri_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - static_assert!(1 <= N && N <= 8); - unsafe { transmute(vshiftrins_v8i8::(transmute(a), transmute(b))) } +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3_f16(a: *mut f16, b: float16x4x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3.v4f16.p0" + )] + fn _vst3_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t, ptr: *mut i8); + } + _vst3_f16(b.0, b.1, b.2, a as _) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u8)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsriq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - static_assert!(1 <= N && N <= 8); - unsafe { transmute(vshiftrins_v16i8::(transmute(a), transmute(b))) } +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3q_f16(a: *mut f16, b: float16x8x3_t) { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3.v8f16.p0" + )] + fn _vst3q_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t, ptr: *mut i8); + } + _vst3q_f16(b.0, b.1, b.2, a as _) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsri_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - static_assert!(1 <= N && N <= 16); - unsafe { transmute(vshiftrins_v4i16::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) { + crate::core_arch::macros::interleaving_store!(f32, 2, 3, a, b) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsriq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - static_assert!(1 <= N && N <= 16); - unsafe { transmute(vshiftrins_v8i16::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) { + crate::core_arch::macros::interleaving_store!(f32, 4, 3, a, b) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsri_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - static_assert!(1 <= N && N <= 32); - unsafe { transmute(vshiftrins_v2i32::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) { + crate::core_arch::macros::interleaving_store!(i8, 8, 3, a, b) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsriq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - static_assert!(1 <= N && N <= 32); - unsafe { transmute(vshiftrins_v4i32::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) { + crate::core_arch::macros::interleaving_store!(i8, 16, 3, a, b) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u64)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsri_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - static_assert!(1 <= N && N <= 64); - unsafe { transmute(vshiftrins_v1i64::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) { + crate::core_arch::macros::interleaving_store!(i16, 4, 3, a, b) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u64)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsriq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - static_assert!(1 <= N && N <= 64); - unsafe { transmute(vshiftrins_v2i64::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) { + crate::core_arch::macros::interleaving_store!(i16, 8, 3, a, b) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p8)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsri_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - static_assert!(1 <= N && N <= 8); - unsafe { transmute(vshiftrins_v8i8::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) { + crate::core_arch::macros::interleaving_store!(i32, 2, 3, a, b) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p8)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsriq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { - static_assert!(1 <= N && N <= 8); - unsafe { transmute(vshiftrins_v16i8::(transmute(a), transmute(b))) } +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) { + crate::core_arch::macros::interleaving_store!(i32, 4, 3, a, b) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsri_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - static_assert!(1 <= N && N <= 16); - unsafe { transmute(vshiftrins_v4i16::(transmute(a), transmute(b))) } +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) { + crate::core_arch::macros::interleaving_store!(f32, 2, 3, a, b) } -#[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f32)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub fn vsriq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - static_assert!(1 <= N && N <= 16); - unsafe { transmute(vshiftrins_v8i16::(transmute(a), transmute(b))) } +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) { + crate::core_arch::macros::interleaving_store!(f32, 4, 3, a, b) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] -pub unsafe fn vst1_f16(ptr: *mut f16, a: float16x4_t) { - vst1_v4f16( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) { + crate::core_arch::macros::interleaving_store!(i8, 8, 3, a, b) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] -pub unsafe fn vst1q_f16(ptr: *mut f16, a: float16x8_t) { - vst1q_v8f16( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) { + crate::core_arch::macros::interleaving_store!(i8, 16, 3, a, b) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(vst1))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst1_f16_x2(a: *mut f16, b: float16x4x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0.v4f16")] - fn _vst1_f16_x2(ptr: *mut f16, a: float16x4_t, b: float16x4_t); - } - _vst1_f16_x2(a, b.0, b.1) -} -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(vst1))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst1q_f16_x2(a: *mut f16, b: float16x8x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0.v8f16")] - fn _vst1q_f16_x2(ptr: *mut f16, a: float16x8_t, b: float16x8_t); - } - _vst1q_f16_x2(a, b.0, b.1) -} -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x2)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] +#[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(st1))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst1_f16_x2(a: *mut f16, b: float16x4x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x2.v4f16.p0" - )] - fn _vst1_f16_x2(a: float16x4_t, b: float16x4_t, ptr: *mut f16); - } - _vst1_f16_x2(b.0, b.1, a) +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) { + crate::core_arch::macros::interleaving_store!(i16, 4, 3, a, b) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x2)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] +#[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(st1))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst1q_f16_x2(a: *mut f16, b: float16x8x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x2.v8f16.p0" - )] - fn _vst1q_f16_x2(a: float16x8_t, b: float16x8_t, ptr: *mut f16); - } - _vst1q_f16_x2(b.0, b.1, a) -} -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(vst1))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst1_f16_x3(a: *mut f16, b: float16x4x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v4f16")] - fn _vst1_f16_x3(ptr: *mut f16, a: float16x4_t, b: float16x4_t, c: float16x4_t); - } - _vst1_f16_x3(a, b.0, b.1, b.2) -} -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(vst1))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst1q_f16_x3(a: *mut f16, b: float16x8x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v8f16")] - fn _vst1q_f16_x3(ptr: *mut f16, a: float16x8_t, b: float16x8_t, c: float16x8_t); - } - _vst1q_f16_x3(a, b.0, b.1, b.2) +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) { + crate::core_arch::macros::interleaving_store!(i16, 8, 3, a, b) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x3)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] +#[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(st1))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst1_f16_x3(a: *mut f16, b: float16x4x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x3.v4f16.p0" - )] - fn _vst1_f16_x3(a: float16x4_t, b: float16x4_t, c: float16x4_t, ptr: *mut f16); - } - _vst1_f16_x3(b.0, b.1, b.2, a) +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) { + crate::core_arch::macros::interleaving_store!(i32, 2, 3, a, b) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x3)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] +#[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(st1))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst1q_f16_x3(a: *mut f16, b: float16x8x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x3.v8f16.p0" - )] - fn _vst1q_f16_x3(a: float16x8_t, b: float16x8_t, c: float16x8_t, ptr: *mut f16); - } - _vst1q_f16_x3(b.0, b.1, b.2, a) +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st3))] +pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) { + crate::core_arch::macros::interleaving_store!(i32, 4, 3, a, b) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x4)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1_f16_x4(a: *mut f16, b: float16x4x4_t) { +pub unsafe fn vst3_lane_f16(a: *mut f16, b: float16x4x3_t) { + static_assert_uimm_bits!(LANE, 2); unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v4f16")] - fn _vst1_f16_x4( - ptr: *mut f16, + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v4f16")] + fn _vst3_lane_f16( + ptr: *mut i8, a: float16x4_t, b: float16x4_t, c: float16x4_t, - d: float16x4_t, + n: i32, + size: i32, ); } - _vst1_f16_x4(a, b.0, b.1, b.2, b.3) + _vst3_lane_f16(a as _, b.0, b.1, b.2, LANE, 4) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x4)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1q_f16_x4(a: *mut f16, b: float16x8x4_t) { +pub unsafe fn vst3q_lane_f16(a: *mut f16, b: float16x8x3_t) { + static_assert_uimm_bits!(LANE, 3); unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v8f16")] - fn _vst1q_f16_x4( - ptr: *mut f16, + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v8f16")] + fn _vst3q_lane_f16( + ptr: *mut i8, a: float16x8_t, b: float16x8_t, c: float16x8_t, - d: float16x8_t, + n: i32, + size: i32, ); } - _vst1q_f16_x4(a, b.0, b.1, b.2, b.3) + _vst3q_lane_f16(a as _, b.0, b.1, b.2, LANE, 4) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x4)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] +#[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(st1))] -#[target_feature(enable = "neon,fp16")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst1_f16_x4(a: *mut f16, b: float16x4x4_t) { +pub unsafe fn vst3_lane_f16(a: *mut f16, b: float16x4x3_t) { + static_assert_uimm_bits!(LANE, 2); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x4.v4f16.p0" + link_name = "llvm.aarch64.neon.st3lane.v4f16.p0" )] - fn _vst1_f16_x4( - a: float16x4_t, - b: float16x4_t, - c: float16x4_t, - d: float16x4_t, - ptr: *mut f16, - ); + fn _vst3_lane_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t, n: i64, ptr: *mut i8); } - _vst1_f16_x4(b.0, b.1, b.2, b.3, a) + _vst3_lane_f16(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x4)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] +#[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(st1))] -#[target_feature(enable = "neon,fp16")] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst1q_f16_x4(a: *mut f16, b: float16x8x4_t) { +pub unsafe fn vst3q_lane_f16(a: *mut f16, b: float16x8x3_t) { + static_assert_uimm_bits!(LANE, 3); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x4.v8f16.p0" + link_name = "llvm.aarch64.neon.st3lane.v8f16.p0" )] - fn _vst1q_f16_x4( - a: float16x8_t, - b: float16x8_t, - c: float16x8_t, - d: float16x8_t, - ptr: *mut f16, - ); + fn _vst3q_lane_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t, n: i64, ptr: *mut i8); } - _vst1q_f16_x4(b.0, b.1, b.2, b.3, a) + _vst3q_lane_f16(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] -pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1_v2f32::(ptr as *const i8, transmute(a)) +pub unsafe fn vst3_lane_f32(a: *mut f32, b: float32x2x3_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v2f32")] + fn _vst3_lane_f32( + ptr: *mut i8, + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, + n: i32, + size: i32, + ); + } + _vst3_lane_f32(a as _, b.0, b.1, b.2, LANE, 4) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] -pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1q_v4f32::(ptr as *const i8, transmute(a)) +pub unsafe fn vst3q_lane_f32(a: *mut f32, b: float32x4x3_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v4f32")] + fn _vst3q_lane_f32( + ptr: *mut i8, + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, + n: i32, + size: i32, + ); + } + _vst3q_lane_f32(a as _, b.0, b.1, b.2, LANE, 4) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] -pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1_v8i8::(ptr as *const i8, a) +pub unsafe fn vst3_lane_s8(a: *mut i8, b: int8x8x3_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v8i8")] + fn _vst3_lane_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, n: i32, size: i32); + } + _vst3_lane_s8(a as _, b.0, b.1, b.2, LANE, 1) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] -pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1q_v16i8::(ptr as *const i8, a) +pub unsafe fn vst3_lane_s16(a: *mut i16, b: int16x4x3_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v4i16")] + fn _vst3_lane_s16( + ptr: *mut i8, + a: int16x4_t, + b: int16x4_t, + c: int16x4_t, + n: i32, + size: i32, + ); + } + _vst3_lane_s16(a as _, b.0, b.1, b.2, LANE, 2) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] -pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1_v4i16::(ptr as *const i8, a) +pub unsafe fn vst3q_lane_s16(a: *mut i16, b: int16x8x3_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v8i16")] + fn _vst3q_lane_s16( + ptr: *mut i8, + a: int16x8_t, + b: int16x8_t, + c: int16x8_t, + n: i32, + size: i32, + ); + } + _vst3q_lane_s16(a as _, b.0, b.1, b.2, LANE, 2) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] -pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1q_v8i16::(ptr as *const i8, a) +pub unsafe fn vst3_lane_s32(a: *mut i32, b: int32x2x3_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v2i32")] + fn _vst3_lane_s32( + ptr: *mut i8, + a: int32x2_t, + b: int32x2_t, + c: int32x2_t, + n: i32, + size: i32, + ); + } + _vst3_lane_s32(a as _, b.0, b.1, b.2, LANE, 4) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] -pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1_v2i32::(ptr as *const i8, a) +pub unsafe fn vst3q_lane_s32(a: *mut i32, b: int32x4x3_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v4i32")] + fn _vst3q_lane_s32( + ptr: *mut i8, + a: int32x4_t, + b: int32x4_t, + c: int32x4_t, + n: i32, + size: i32, + ); + } + _vst3q_lane_s32(a as _, b.0, b.1, b.2, LANE, 4) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] -pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1q_v4i32::(ptr as *const i8, a) +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3_lane_f32(a: *mut f32, b: float32x2x3_t) { + static_assert_uimm_bits!(LANE, 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3lane.v2f32.p0" + )] + fn _vst3_lane_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t, n: i64, ptr: *mut i8); + } + _vst3_lane_f32(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] -pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1_v1i64::(ptr as *const i8, a) +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3q_lane_f32(a: *mut f32, b: float32x4x3_t) { + static_assert_uimm_bits!(LANE, 2); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3lane.v4f32.p0" + )] + fn _vst3q_lane_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t, n: i64, ptr: *mut i8); + } + _vst3q_lane_f32(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] -pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1q_v2i64::(ptr as *const i8, a) +#[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3_lane_s8(a: *mut i8, b: int8x8x3_t) { + static_assert_uimm_bits!(LANE, 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.st3lane.v8i8.p0" + )] + fn _vst3_lane_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t, n: i64, ptr: *mut i8); + } + _vst3_lane_s8(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] -pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1_v8i8::(ptr as *const i8, transmute(a)) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] -pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1q_v16i8::(ptr as *const i8, transmute(a)) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] -pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1_v4i16::(ptr as *const i8, transmute(a)) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] -pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1q_v8i16::(ptr as *const i8, transmute(a)) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] -pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1_v2i32::(ptr as *const i8, transmute(a)) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] -pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1q_v4i32::(ptr as *const i8, transmute(a)) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] -pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1_v1i64::(ptr as *const i8, transmute(a)) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] -pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1q_v2i64::(ptr as *const i8, transmute(a)) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] -pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1_v8i8::(ptr as *const i8, transmute(a)) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] -pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1q_v16i8::(ptr as *const i8, transmute(a)) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] -pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1_v4i16::(ptr as *const i8, transmute(a)) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] -pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1q_v8i16::(ptr as *const i8, transmute(a)) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] -pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1_v1i64::(ptr as *const i8, transmute(a)) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] -pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) { - const ALIGN: i32 = crate::mem::align_of::() as i32; - vst1q_v2i64::(ptr as *const i8, transmute(a)) -} -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst1))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v2f32.p0")] - fn _vst1_f32_x2(ptr: *mut f32, a: float32x2_t, b: float32x2_t); - } - _vst1_f32_x2(a, b.0, b.1) -} -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst1))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v4f32.p0")] - fn _vst1q_f32_x2(ptr: *mut f32, a: float32x4_t, b: float32x4_t); - } - _vst1q_f32_x2(a, b.0, b.1) -} -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(st1))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x2.v2f32.p0" - )] - fn _vst1_f32_x2(a: float32x2_t, b: float32x2_t, ptr: *mut f32); - } - _vst1_f32_x2(b.0, b.1, a) -} -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(st1))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x2.v4f32.p0" - )] - fn _vst1q_f32_x2(a: float32x4_t, b: float32x4_t, ptr: *mut f32); - } - _vst1q_f32_x2(b.0, b.1, a) -} -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x3)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(st1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_f32_x3(a: *mut f32, b: float32x2x3_t) { +pub unsafe fn vst3_lane_s16(a: *mut i16, b: int16x4x3_t) { + static_assert_uimm_bits!(LANE, 2); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x3.v2f32.p0" + link_name = "llvm.aarch64.neon.st3lane.v4i16.p0" )] - fn _vst1_f32_x3(a: float32x2_t, b: float32x2_t, c: float32x2_t, ptr: *mut f32); + fn _vst3_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t, n: i64, ptr: *mut i8); } - _vst1_f32_x3(b.0, b.1, b.2, a) + _vst3_lane_s16(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x3)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(st1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_f32_x3(a: *mut f32, b: float32x4x3_t) { +pub unsafe fn vst3q_lane_s16(a: *mut i16, b: int16x8x3_t) { + static_assert_uimm_bits!(LANE, 3); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x3.v4f32.p0" + link_name = "llvm.aarch64.neon.st3lane.v8i16.p0" )] - fn _vst1q_f32_x3(a: float32x4_t, b: float32x4_t, c: float32x4_t, ptr: *mut f32); - } - _vst1q_f32_x3(b.0, b.1, b.2, a) -} -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v2f32.p0")] - fn _vst1_f32_x4( - ptr: *mut f32, - a: float32x2_t, - b: float32x2_t, - c: float32x2_t, - d: float32x2_t, - ); - } - _vst1_f32_x4(a, b.0, b.1, b.2, b.3) -} -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v4f32.p0")] - fn _vst1q_f32_x4( - ptr: *mut f32, - a: float32x4_t, - b: float32x4_t, - c: float32x4_t, - d: float32x4_t, - ); + fn _vst3q_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t, n: i64, ptr: *mut i8); } - _vst1q_f32_x4(a, b.0, b.1, b.2, b.3) + _vst3q_lane_s16(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x4)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(st1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) { +pub unsafe fn vst3_lane_s32(a: *mut i32, b: int32x2x3_t) { + static_assert_uimm_bits!(LANE, 1); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x4.v2f32.p0" + link_name = "llvm.aarch64.neon.st3lane.v2i32.p0" )] - fn _vst1_f32_x4( - a: float32x2_t, - b: float32x2_t, - c: float32x2_t, - d: float32x2_t, - ptr: *mut f32, - ); + fn _vst3_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t, n: i64, ptr: *mut i8); } - _vst1_f32_x4(b.0, b.1, b.2, b.3, a) + _vst3_lane_s32(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x4)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[cfg_attr(test, assert_instr(st1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) { +pub unsafe fn vst3q_lane_s32(a: *mut i32, b: int32x4x3_t) { + static_assert_uimm_bits!(LANE, 2); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x4.v4f32.p0" + link_name = "llvm.aarch64.neon.st3lane.v4i32.p0" )] - fn _vst1q_f32_x4( - a: float32x4_t, - b: float32x4_t, - c: float32x4_t, - d: float32x4_t, - ptr: *mut f32, - ); + fn _vst3q_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t, n: i64, ptr: *mut i8); } - _vst1q_f32_x4(b.0, b.1, b.2, b.3, a) + _vst3q_lane_s32(b.0, b.1, b.2, LANE as i64, a as _) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(st3, LANE = 0) )] #[rustc_legacy_const_generics(2)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst1_lane_f16(a: *mut f16, b: float16x4_t) { - static_assert_uimm_bits!(LANE, 2); - *a = simd_extract!(b, LANE as u32); -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") )] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst1q_lane_f16(a: *mut f16, b: float16x8_t) { +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub unsafe fn vst3_lane_u8(a: *mut u8, b: uint8x8x3_t) { static_assert_uimm_bits!(LANE, 3); - *a = simd_extract!(b, LANE as u32); + vst3_lane_s8::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(st3, LANE = 0) )] #[rustc_legacy_const_generics(2)] #[cfg_attr( @@ -63210,21 +62911,21 @@ pub unsafe fn vst1q_lane_f16(a: *mut f16, b: float16x8_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_lane_f32(a: *mut f32, b: float32x2_t) { - static_assert_uimm_bits!(LANE, 1); - *a = simd_extract!(b, LANE as u32); +pub unsafe fn vst3_lane_u16(a: *mut u16, b: uint16x4x3_t) { + static_assert_uimm_bits!(LANE, 2); + vst3_lane_s16::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_f32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(st3, LANE = 0) )] #[rustc_legacy_const_generics(2)] #[cfg_attr( @@ -63235,21 +62936,21 @@ pub unsafe fn vst1_lane_f32(a: *mut f32, b: float32x2_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_lane_f32(a: *mut f32, b: float32x4_t) { - static_assert_uimm_bits!(LANE, 2); - *a = simd_extract!(b, LANE as u32); +pub unsafe fn vst3q_lane_u16(a: *mut u16, b: uint16x8x3_t) { + static_assert_uimm_bits!(LANE, 3); + vst3q_lane_s16::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s8)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(st3, LANE = 0) )] #[rustc_legacy_const_generics(2)] #[cfg_attr( @@ -63260,21 +62961,21 @@ pub unsafe fn vst1q_lane_f32(a: *mut f32, b: float32x4_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_lane_s8(a: *mut i8, b: int8x8_t) { - static_assert_uimm_bits!(LANE, 3); - *a = simd_extract!(b, LANE as u32); +pub unsafe fn vst3_lane_u32(a: *mut u32, b: uint32x2x3_t) { + static_assert_uimm_bits!(LANE, 1); + vst3_lane_s32::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s8)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(st3, LANE = 0) )] #[rustc_legacy_const_generics(2)] #[cfg_attr( @@ -63285,21 +62986,21 @@ pub unsafe fn vst1_lane_s8(a: *mut i8, b: int8x8_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_lane_s8(a: *mut i8, b: int8x16_t) { - static_assert_uimm_bits!(LANE, 4); - *a = simd_extract!(b, LANE as u32); +pub unsafe fn vst3q_lane_u32(a: *mut u32, b: uint32x4x3_t) { + static_assert_uimm_bits!(LANE, 2); + vst3q_lane_s32::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(st3, LANE = 0) )] #[rustc_legacy_const_generics(2)] #[cfg_attr( @@ -63310,21 +63011,21 @@ pub unsafe fn vst1q_lane_s8(a: *mut i8, b: int8x16_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_lane_s16(a: *mut i16, b: int16x4_t) { - static_assert_uimm_bits!(LANE, 2); - *a = simd_extract!(b, LANE as u32); +pub unsafe fn vst3_lane_p8(a: *mut p8, b: poly8x8x3_t) { + static_assert_uimm_bits!(LANE, 3); + vst3_lane_s8::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(st3, LANE = 0) )] #[rustc_legacy_const_generics(2)] #[cfg_attr( @@ -63335,21 +63036,21 @@ pub unsafe fn vst1_lane_s16(a: *mut i16, b: int16x4_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_lane_s16(a: *mut i16, b: int16x8_t) { - static_assert_uimm_bits!(LANE, 3); - *a = simd_extract!(b, LANE as u32); +pub unsafe fn vst3_lane_p16(a: *mut p16, b: poly16x4x3_t) { + static_assert_uimm_bits!(LANE, 2); + vst3_lane_s16::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(st3, LANE = 0) )] #[rustc_legacy_const_generics(2)] #[cfg_attr( @@ -63360,23 +63061,22 @@ pub unsafe fn vst1q_lane_s16(a: *mut i16, b: int16x8_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_lane_s32(a: *mut i32, b: int32x2_t) { - static_assert_uimm_bits!(LANE, 1); - *a = simd_extract!(b, LANE as u32); +pub unsafe fn vst3q_lane_p16(a: *mut p16, b: poly16x8x3_t) { + static_assert_uimm_bits!(LANE, 3); + vst3q_lane_s16::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(nop) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -63385,73 +63085,45 @@ pub unsafe fn vst1_lane_s32(a: *mut i32, b: int32x2_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_lane_s32(a: *mut i32, b: int32x4_t) { - static_assert_uimm_bits!(LANE, 2); - *a = simd_extract!(b, LANE as u32); +pub unsafe fn vst3_p64(a: *mut p64, b: poly64x1x3_t) { + vst3_s64(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s64)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1q_lane_s64(a: *mut i64, b: int64x2_t) { - static_assert_uimm_bits!(LANE, 1); - *a = simd_extract!(b, LANE as u32); +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) { + core::ptr::write_unaligned(a.cast(), b) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u8)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1_lane_u8(a: *mut u8, b: uint8x8_t) { - static_assert_uimm_bits!(LANE, 3); - *a = simd_extract!(b, LANE as u32); +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) { + core::ptr::write_unaligned(a.cast(), b) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u8)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(nop) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -63460,23 +63132,21 @@ pub unsafe fn vst1_lane_u8(a: *mut u8, b: uint8x8_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_lane_u8(a: *mut u8, b: uint8x16_t) { - static_assert_uimm_bits!(LANE, 4); - *a = simd_extract!(b, LANE as u32); +pub unsafe fn vst3_u64(a: *mut u64, b: uint64x1x3_t) { + vst3_s64(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(st3) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -63485,23 +63155,21 @@ pub unsafe fn vst1q_lane_u8(a: *mut u8, b: uint8x16_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_lane_u16(a: *mut u16, b: uint16x4_t) { - static_assert_uimm_bits!(LANE, 2); - *a = simd_extract!(b, LANE as u32); +pub unsafe fn vst3_u8(a: *mut u8, b: uint8x8x3_t) { + vst3_s8(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(st3) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -63510,23 +63178,21 @@ pub unsafe fn vst1_lane_u16(a: *mut u16, b: uint16x4_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_lane_u16(a: *mut u16, b: uint16x8_t) { - static_assert_uimm_bits!(LANE, 3); - *a = simd_extract!(b, LANE as u32); +pub unsafe fn vst3q_u8(a: *mut u8, b: uint8x16x3_t) { + vst3q_s8(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(st3) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -63535,23 +63201,21 @@ pub unsafe fn vst1q_lane_u16(a: *mut u16, b: uint16x8_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_lane_u32(a: *mut u32, b: uint32x2_t) { - static_assert_uimm_bits!(LANE, 1); - *a = simd_extract!(b, LANE as u32); +pub unsafe fn vst3_u16(a: *mut u16, b: uint16x4x3_t) { + vst3_s16(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u32)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(st3) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -63560,23 +63224,21 @@ pub unsafe fn vst1_lane_u32(a: *mut u32, b: uint32x2_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_lane_u32(a: *mut u32, b: uint32x4_t) { - static_assert_uimm_bits!(LANE, 2); - *a = simd_extract!(b, LANE as u32); +pub unsafe fn vst3q_u16(a: *mut u16, b: uint16x8x3_t) { + vst3q_s16(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u64)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(st3) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -63585,23 +63247,21 @@ pub unsafe fn vst1q_lane_u32(a: *mut u32, b: uint32x4_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_lane_u64(a: *mut u64, b: uint64x2_t) { - static_assert_uimm_bits!(LANE, 1); - *a = simd_extract!(b, LANE as u32); +pub unsafe fn vst3_u32(a: *mut u32, b: uint32x2x3_t) { + vst3_s32(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p8)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(st3) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -63610,23 +63270,21 @@ pub unsafe fn vst1q_lane_u64(a: *mut u64, b: uint64x2_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_lane_p8(a: *mut p8, b: poly8x8_t) { - static_assert_uimm_bits!(LANE, 3); - *a = simd_extract!(b, LANE as u32); +pub unsafe fn vst3q_u32(a: *mut u32, b: uint32x4x3_t) { + vst3q_s32(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p8)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(st3) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -63635,23 +63293,21 @@ pub unsafe fn vst1_lane_p8(a: *mut p8, b: poly8x8_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_lane_p8(a: *mut p8, b: poly8x16_t) { - static_assert_uimm_bits!(LANE, 4); - *a = simd_extract!(b, LANE as u32); +pub unsafe fn vst3_p8(a: *mut p8, b: poly8x8x3_t) { + vst3_s8(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(st3) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -63660,48 +63316,21 @@ pub unsafe fn vst1q_lane_p8(a: *mut p8, b: poly8x16_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_lane_p16(a: *mut p16, b: poly16x4_t) { - static_assert_uimm_bits!(LANE, 2); - *a = simd_extract!(b, LANE as u32); +pub unsafe fn vst3q_p8(a: *mut p8, b: poly8x16x3_t) { + vst3q_s8(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p16)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1q_lane_p16(a: *mut p16, b: poly16x8_t) { - static_assert_uimm_bits!(LANE, 3); - *a = simd_extract!(b, LANE as u32); -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(st3) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -63710,23 +63339,21 @@ pub unsafe fn vst1q_lane_p16(a: *mut p16, b: poly16x8_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_lane_p64(a: *mut p64, b: poly64x1_t) { - static_assert!(LANE == 0); - *a = simd_extract!(b, LANE as u32); +pub unsafe fn vst3_p16(a: *mut p16, b: poly16x4x3_t) { + vst3_s16(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s64)"] +#[doc = "Store multiple 3-element structures from three registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(st3) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -63735,1025 +63362,878 @@ pub unsafe fn vst1_lane_p64(a: *mut p64, b: poly64x1_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_lane_s64(a: *mut i64, b: int64x1_t) { - static_assert!(LANE == 0); - *a = simd_extract!(b, LANE as u32); +pub unsafe fn vst3q_p16(a: *mut p16, b: poly16x8x3_t) { + vst3q_s16(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u64)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1_lane_u64(a: *mut u64, b: uint64x1_t) { - static_assert!(LANE == 0); - *a = simd_extract!(b, LANE as u32); -} -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1_p64_x2(a: *mut p64, b: poly64x1x2_t) { - vst1_s64_x2(transmute(a), transmute(b)) -} -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1_p64_x3(a: *mut p64, b: poly64x1x3_t) { - vst1_s64_x3(transmute(a), transmute(b)) -} -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1_p64_x4(a: *mut p64, b: poly64x1x4_t) { - vst1_s64_x4(transmute(a), transmute(b)) -} -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1q_p64_x2(a: *mut p64, b: poly64x2x2_t) { - vst1q_s64_x2(transmute(a), transmute(b)) -} -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1q_p64_x3(a: *mut p64, b: poly64x2x3_t) { - vst1q_s64_x3(transmute(a), transmute(b)) -} -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst1q_p64_x4(a: *mut p64, b: poly64x2x4_t) { - vst1q_s64_x4(transmute(a), transmute(b)) +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4_f16(a: *mut f16, b: float16x4x4_t) { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v4f16")] + fn _vst4_f16( + ptr: *mut i8, + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + size: i32, + ); + } + _vst4_f16(a as _, b.0, b.1, b.2, b.3, 2) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) { +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4q_f16(a: *mut f16, b: float16x8x4_t) { unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x2.v8i8.p0" - )] - fn _vst1_s8_x2(a: int8x8_t, b: int8x8_t, ptr: *mut i8); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v8f16")] + fn _vst4q_f16( + ptr: *mut i8, + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + size: i32, + ); } - _vst1_s8_x2(b.0, b.1, a) + _vst4q_f16(a as _, b.0, b.1, b.2, b.3, 2) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) { +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4_f16(a: *mut f16, b: float16x4x4_t) { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x2.v16i8.p0" + link_name = "llvm.aarch64.neon.st4.v4f16.p0" )] - fn _vst1q_s8_x2(a: int8x16_t, b: int8x16_t, ptr: *mut i8); + fn _vst4_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t, d: float16x4_t, ptr: *mut i8); } - _vst1q_s8_x2(b.0, b.1, a) + _vst4_f16(b.0, b.1, b.2, b.3, a as _) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) { +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4q_f16(a: *mut f16, b: float16x8x4_t) { unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x2.v4i16.p0" + link_name = "llvm.aarch64.neon.st4.v8f16.p0" )] - fn _vst1_s16_x2(a: int16x4_t, b: int16x4_t, ptr: *mut i16); + fn _vst4q_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t, d: float16x8_t, ptr: *mut i8); } - _vst1_s16_x2(b.0, b.1, a) + _vst4q_f16(b.0, b.1, b.2, b.3, a as _) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) { +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) { unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x2.v8i16.p0" - )] - fn _vst1q_s16_x2(a: int16x8_t, b: int16x8_t, ptr: *mut i16); - } - _vst1q_s16_x2(b.0, b.1, a) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x2.v2i32.p0" - )] - fn _vst1_s32_x2(a: int32x2_t, b: int32x2_t, ptr: *mut i32); - } - _vst1_s32_x2(b.0, b.1, a) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x2.v4i32.p0" - )] - fn _vst1q_s32_x2(a: int32x4_t, b: int32x4_t, ptr: *mut i32); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v2f32")] + fn _vst4_f32( + ptr: *mut i8, + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, + d: float32x2_t, + size: i32, + ); } - _vst1q_s32_x2(b.0, b.1, a) + _vst4_f32(a as _, b.0, b.1, b.2, b.3, 4) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) { +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) { unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x2.v1i64.p0" - )] - fn _vst1_s64_x2(a: int64x1_t, b: int64x1_t, ptr: *mut i64); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v4f32")] + fn _vst4q_f32( + ptr: *mut i8, + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, + d: float32x4_t, + size: i32, + ); } - _vst1_s64_x2(b.0, b.1, a) + _vst4q_f32(a as _, b.0, b.1, b.2, b.3, 4) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) { +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) { unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x2.v2i64.p0" - )] - fn _vst1q_s64_x2(a: int64x2_t, b: int64x2_t, ptr: *mut i64); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v8i8")] + fn _vst4_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, size: i32); } - _vst1q_s64_x2(b.0, b.1, a) + _vst4_s8(a as _, b.0, b.1, b.2, b.3, 1) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) { +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v8i8.p0")] - fn _vst1_s8_x2(ptr: *mut i8, a: int8x8_t, b: int8x8_t); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v16i8")] + fn _vst4q_s8( + ptr: *mut i8, + a: int8x16_t, + b: int8x16_t, + c: int8x16_t, + d: int8x16_t, + size: i32, + ); } - _vst1_s8_x2(a, b.0, b.1) + _vst4q_s8(a as _, b.0, b.1, b.2, b.3, 1) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) { +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v16i8.p0")] - fn _vst1q_s8_x2(ptr: *mut i8, a: int8x16_t, b: int8x16_t); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v4i16")] + fn _vst4_s16( + ptr: *mut i8, + a: int16x4_t, + b: int16x4_t, + c: int16x4_t, + d: int16x4_t, + size: i32, + ); } - _vst1q_s8_x2(a, b.0, b.1) + _vst4_s16(a as _, b.0, b.1, b.2, b.3, 2) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) { +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v4i16.p0")] - fn _vst1_s16_x2(ptr: *mut i16, a: int16x4_t, b: int16x4_t); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v8i16")] + fn _vst4q_s16( + ptr: *mut i8, + a: int16x8_t, + b: int16x8_t, + c: int16x8_t, + d: int16x8_t, + size: i32, + ); } - _vst1_s16_x2(a, b.0, b.1) + _vst4q_s16(a as _, b.0, b.1, b.2, b.3, 2) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) { +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v8i16.p0")] - fn _vst1q_s16_x2(ptr: *mut i16, a: int16x8_t, b: int16x8_t); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v2i32")] + fn _vst4_s32( + ptr: *mut i8, + a: int32x2_t, + b: int32x2_t, + c: int32x2_t, + d: int32x2_t, + size: i32, + ); } - _vst1q_s16_x2(a, b.0, b.1) + _vst4_s32(a as _, b.0, b.1, b.2, b.3, 4) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) { +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v2i32.p0")] - fn _vst1_s32_x2(ptr: *mut i32, a: int32x2_t, b: int32x2_t); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v4i32")] + fn _vst4q_s32( + ptr: *mut i8, + a: int32x4_t, + b: int32x4_t, + c: int32x4_t, + d: int32x4_t, + size: i32, + ); } - _vst1_s32_x2(a, b.0, b.1) + _vst4q_s32(a as _, b.0, b.1, b.2, b.3, 4) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v4i32.p0")] - fn _vst1q_s32_x2(ptr: *mut i32, a: int32x4_t, b: int32x4_t); - } - _vst1q_s32_x2(a, b.0, b.1) +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) { + crate::core_arch::macros::interleaving_store!(f32, 2, 4, a, b) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v1i64.p0")] - fn _vst1_s64_x2(ptr: *mut i64, a: int64x1_t, b: int64x1_t); - } - _vst1_s64_x2(a, b.0, b.1) +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) { + crate::core_arch::macros::interleaving_store!(f32, 4, 4, a, b) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v2i64.p0")] - fn _vst1q_s64_x2(ptr: *mut i64, a: int64x2_t, b: int64x2_t); - } - _vst1q_s64_x2(a, b.0, b.1) +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) { + crate::core_arch::macros::interleaving_store!(i8, 8, 4, a, b) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x3)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x3.v8i8.p0" - )] - fn _vst1_s8_x3(a: int8x8_t, b: int8x8_t, c: int8x8_t, ptr: *mut i8); - } - _vst1_s8_x3(b.0, b.1, b.2, a) +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) { + crate::core_arch::macros::interleaving_store!(i8, 16, 4, a, b) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x3)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x3.v16i8.p0" - )] - fn _vst1q_s8_x3(a: int8x16_t, b: int8x16_t, c: int8x16_t, ptr: *mut i8); - } - _vst1q_s8_x3(b.0, b.1, b.2, a) +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) { + crate::core_arch::macros::interleaving_store!(i16, 4, 4, a, b) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x3)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x3.v4i16.p0" - )] - fn _vst1_s16_x3(a: int16x4_t, b: int16x4_t, c: int16x4_t, ptr: *mut i16); - } - _vst1_s16_x3(b.0, b.1, b.2, a) +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) { + crate::core_arch::macros::interleaving_store!(i16, 8, 4, a, b) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x3)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x3.v8i16.p0" - )] - fn _vst1q_s16_x3(a: int16x8_t, b: int16x8_t, c: int16x8_t, ptr: *mut i16); - } - _vst1q_s16_x3(b.0, b.1, b.2, a) +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) { + crate::core_arch::macros::interleaving_store!(i32, 2, 4, a, b) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x3)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) { +#[cfg_attr(test, assert_instr(st4))] +pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) { + crate::core_arch::macros::interleaving_store!(i32, 4, 4, a, b) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f16)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst4_lane_f16(a: *mut f16, b: float16x4x4_t) { + static_assert_uimm_bits!(LANE, 2); unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x3.v2i32.p0" - )] - fn _vst1_s32_x3(a: int32x2_t, b: int32x2_t, c: int32x2_t, ptr: *mut i32); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v4f16")] + fn _vst4_lane_f16( + ptr: *mut i8, + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + n: i32, + size: i32, + ); } - _vst1_s32_x3(b.0, b.1, b.2, a) + _vst4_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x3)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) { +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst4q_lane_f16(a: *mut f16, b: float16x8x4_t) { + static_assert_uimm_bits!(LANE, 3); unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x3.v4i32.p0" - )] - fn _vst1q_s32_x3(a: int32x4_t, b: int32x4_t, c: int32x4_t, ptr: *mut i32); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v8f16")] + fn _vst4q_lane_f16( + ptr: *mut i8, + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + n: i32, + size: i32, + ); } - _vst1q_s32_x3(b.0, b.1, b.2, a) + _vst4q_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x3)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) { +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst4_lane_f16(a: *mut f16, b: float16x4x4_t) { + static_assert_uimm_bits!(LANE, 2); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x3.v1i64.p0" + link_name = "llvm.aarch64.neon.st4lane.v4f16.p0" )] - fn _vst1_s64_x3(a: int64x1_t, b: int64x1_t, c: int64x1_t, ptr: *mut i64); + fn _vst4_lane_f16( + a: float16x4_t, + b: float16x4_t, + c: float16x4_t, + d: float16x4_t, + n: i64, + ptr: *mut i8, + ); } - _vst1_s64_x3(b.0, b.1, b.2, a) + _vst4_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x3)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) { +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] +#[unstable(feature = "stdarch_neon_f16", issue = "136306")] +#[cfg(not(target_arch = "arm64ec"))] +pub unsafe fn vst4q_lane_f16(a: *mut f16, b: float16x8x4_t) { + static_assert_uimm_bits!(LANE, 3); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x3.v2i64.p0" + link_name = "llvm.aarch64.neon.st4lane.v8f16.p0" )] - fn _vst1q_s64_x3(a: int64x2_t, b: int64x2_t, c: int64x2_t, ptr: *mut i64); + fn _vst4q_lane_f16( + a: float16x8_t, + b: float16x8_t, + c: float16x8_t, + d: float16x8_t, + n: i64, + ptr: *mut i8, + ); } - _vst1q_s64_x3(b.0, b.1, b.2, a) + _vst4q_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x3)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) { +pub unsafe fn vst4_lane_f32(a: *mut f32, b: float32x2x4_t) { + static_assert_uimm_bits!(LANE, 1); unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v8i8.p0")] - fn _vst1_s8_x3(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v2f32")] + fn _vst4_lane_f32( + ptr: *mut i8, + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, + d: float32x2_t, + n: i32, + size: i32, + ); } - _vst1_s8_x3(a, b.0, b.1, b.2) + _vst4_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x3)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v16i8.p0")] - fn _vst1q_s8_x3(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t); - } - _vst1q_s8_x3(a, b.0, b.1, b.2) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] #[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) { +pub unsafe fn vst4q_lane_f32(a: *mut f32, b: float32x4x4_t) { + static_assert_uimm_bits!(LANE, 2); unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v4i16.p0")] - fn _vst1_s16_x3(ptr: *mut i16, a: int16x4_t, b: int16x4_t, c: int16x4_t); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v4f32")] + fn _vst4q_lane_f32( + ptr: *mut i8, + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, + d: float32x4_t, + n: i32, + size: i32, + ); } - _vst1_s16_x3(a, b.0, b.1, b.2) + _vst4q_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x3)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) { +pub unsafe fn vst4_lane_s8(a: *mut i8, b: int8x8x4_t) { + static_assert_uimm_bits!(LANE, 3); unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v8i16.p0")] - fn _vst1q_s16_x3(ptr: *mut i16, a: int16x8_t, b: int16x8_t, c: int16x8_t); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v8i8")] + fn _vst4_lane_s8( + ptr: *mut i8, + a: int8x8_t, + b: int8x8_t, + c: int8x8_t, + d: int8x8_t, + n: i32, + size: i32, + ); } - _vst1q_s16_x3(a, b.0, b.1, b.2) + _vst4_lane_s8(a as _, b.0, b.1, b.2, b.3, LANE, 1) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x3)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) { +pub unsafe fn vst4_lane_s16(a: *mut i16, b: int16x4x4_t) { + static_assert_uimm_bits!(LANE, 2); unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v2i32.p0")] - fn _vst1_s32_x3(ptr: *mut i32, a: int32x2_t, b: int32x2_t, c: int32x2_t); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v4i16")] + fn _vst4_lane_s16( + ptr: *mut i8, + a: int16x4_t, + b: int16x4_t, + c: int16x4_t, + d: int16x4_t, + n: i32, + size: i32, + ); } - _vst1_s32_x3(a, b.0, b.1, b.2) + _vst4_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x3)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) { +pub unsafe fn vst4q_lane_s16(a: *mut i16, b: int16x8x4_t) { + static_assert_uimm_bits!(LANE, 3); unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v4i32.p0")] - fn _vst1q_s32_x3(ptr: *mut i32, a: int32x4_t, b: int32x4_t, c: int32x4_t); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v8i16")] + fn _vst4q_lane_s16( + ptr: *mut i8, + a: int16x8_t, + b: int16x8_t, + c: int16x8_t, + d: int16x8_t, + n: i32, + size: i32, + ); } - _vst1q_s32_x3(a, b.0, b.1, b.2) + _vst4q_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x3)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) { +pub unsafe fn vst4_lane_s32(a: *mut i32, b: int32x2x4_t) { + static_assert_uimm_bits!(LANE, 1); unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v1i64.p0")] - fn _vst1_s64_x3(ptr: *mut i64, a: int64x1_t, b: int64x1_t, c: int64x1_t); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v2i32")] + fn _vst4_lane_s32( + ptr: *mut i8, + a: int32x2_t, + b: int32x2_t, + c: int32x2_t, + d: int32x2_t, + n: i32, + size: i32, + ); } - _vst1_s64_x3(a, b.0, b.1, b.2) + _vst4_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x3)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v2i64.p0")] - fn _vst1q_s64_x3(ptr: *mut i64, a: int64x2_t, b: int64x2_t, c: int64x2_t); - } - _vst1q_s64_x3(a, b.0, b.1, b.2) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) { +pub unsafe fn vst4q_lane_s32(a: *mut i32, b: int32x4x4_t) { + static_assert_uimm_bits!(LANE, 2); unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x4.v8i8.p0" - )] - fn _vst1_s8_x4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, ptr: *mut i8); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v4i32")] + fn _vst4q_lane_s32( + ptr: *mut i8, + a: int32x4_t, + b: int32x4_t, + c: int32x4_t, + d: int32x4_t, + n: i32, + size: i32, + ); } - _vst1_s8_x4(b.0, b.1, b.2, b.3, a) + _vst4q_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x4)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) { +pub unsafe fn vst4_lane_f32(a: *mut f32, b: float32x2x4_t) { + static_assert_uimm_bits!(LANE, 1); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x4.v16i8.p0" + link_name = "llvm.aarch64.neon.st4lane.v2f32.p0" )] - fn _vst1q_s8_x4(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, ptr: *mut i8); + fn _vst4_lane_f32( + a: float32x2_t, + b: float32x2_t, + c: float32x2_t, + d: float32x2_t, + n: i64, + ptr: *mut i8, + ); } - _vst1q_s8_x4(b.0, b.1, b.2, b.3, a) + _vst4_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x4)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) { +pub unsafe fn vst4q_lane_f32(a: *mut f32, b: float32x4x4_t) { + static_assert_uimm_bits!(LANE, 2); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x4.v4i16.p0" + link_name = "llvm.aarch64.neon.st4lane.v4f32.p0" )] - fn _vst1_s16_x4(a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, ptr: *mut i16); + fn _vst4q_lane_f32( + a: float32x4_t, + b: float32x4_t, + c: float32x4_t, + d: float32x4_t, + n: i64, + ptr: *mut i8, + ); } - _vst1_s16_x4(b.0, b.1, b.2, b.3, a) + _vst4q_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x4)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) { +pub unsafe fn vst4_lane_s8(a: *mut i8, b: int8x8x4_t) { + static_assert_uimm_bits!(LANE, 3); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x4.v8i16.p0" + link_name = "llvm.aarch64.neon.st4lane.v8i8.p0" )] - fn _vst1q_s16_x4(a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, ptr: *mut i16); + fn _vst4_lane_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, n: i64, ptr: *mut i8); } - _vst1q_s16_x4(b.0, b.1, b.2, b.3, a) + _vst4_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x4)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) { +pub unsafe fn vst4_lane_s16(a: *mut i16, b: int16x4x4_t) { + static_assert_uimm_bits!(LANE, 2); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x4.v2i32.p0" + link_name = "llvm.aarch64.neon.st4lane.v4i16.p0" )] - fn _vst1_s32_x4(a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, ptr: *mut i32); + fn _vst4_lane_s16( + a: int16x4_t, + b: int16x4_t, + c: int16x4_t, + d: int16x4_t, + n: i64, + ptr: *mut i8, + ); } - _vst1_s32_x4(b.0, b.1, b.2, b.3, a) + _vst4_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x4)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) { +pub unsafe fn vst4q_lane_s16(a: *mut i16, b: int16x8x4_t) { + static_assert_uimm_bits!(LANE, 3); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x4.v4i32.p0" + link_name = "llvm.aarch64.neon.st4lane.v8i16.p0" )] - fn _vst1q_s32_x4(a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, ptr: *mut i32); + fn _vst4q_lane_s16( + a: int16x8_t, + b: int16x8_t, + c: int16x8_t, + d: int16x8_t, + n: i64, + ptr: *mut i8, + ); } - _vst1q_s32_x4(b.0, b.1, b.2, b.3, a) + _vst4q_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x4)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) { +pub unsafe fn vst4_lane_s32(a: *mut i32, b: int32x2x4_t) { + static_assert_uimm_bits!(LANE, 1); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x4.v1i64.p0" + link_name = "llvm.aarch64.neon.st4lane.v2i32.p0" )] - fn _vst1_s64_x4(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, ptr: *mut i64); + fn _vst4_lane_s32( + a: int32x2_t, + b: int32x2_t, + c: int32x2_t, + d: int32x2_t, + n: i64, + ptr: *mut i8, + ); } - _vst1_s64_x4(b.0, b.1, b.2, b.3, a) + _vst4_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x4)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(not(target_arch = "arm"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] -pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) { +pub unsafe fn vst4q_lane_s32(a: *mut i32, b: int32x4x4_t) { + static_assert_uimm_bits!(LANE, 2); unsafe extern "unadjusted" { #[cfg_attr( any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st1x4.v2i64.p0" + link_name = "llvm.aarch64.neon.st4lane.v4i32.p0" )] - fn _vst1q_s64_x4(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, ptr: *mut i64); - } - _vst1q_s64_x4(b.0, b.1, b.2, b.3, a) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v8i8.p0")] - fn _vst1_s8_x4(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t); - } - _vst1_s8_x4(a, b.0, b.1, b.2, b.3) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v16i8.p0")] - fn _vst1q_s8_x4(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t); - } - _vst1q_s8_x4(a, b.0, b.1, b.2, b.3) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v4i16.p0")] - fn _vst1_s16_x4(ptr: *mut i16, a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t); - } - _vst1_s16_x4(a, b.0, b.1, b.2, b.3) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v8i16.p0")] - fn _vst1q_s16_x4(ptr: *mut i16, a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t); - } - _vst1q_s16_x4(a, b.0, b.1, b.2, b.3) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v2i32.p0")] - fn _vst1_s32_x4(ptr: *mut i32, a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t); - } - _vst1_s32_x4(a, b.0, b.1, b.2, b.3) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v4i32.p0")] - fn _vst1q_s32_x4(ptr: *mut i32, a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t); - } - _vst1q_s32_x4(a, b.0, b.1, b.2, b.3) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v1i64.p0")] - fn _vst1_s64_x4(ptr: *mut i64, a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t); - } - _vst1_s64_x4(a, b.0, b.1, b.2, b.3) -} -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst1))] -pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v2i64.p0")] - fn _vst1q_s64_x4(ptr: *mut i64, a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t); + fn _vst4q_lane_s32( + a: int32x4_t, + b: int32x4_t, + c: int32x4_t, + d: int32x4_t, + n: i64, + ptr: *mut i8, + ); } - _vst1q_s64_x4(a, b.0, b.1, b.2, b.3) + _vst4q_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(st4, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -64762,21 +64242,23 @@ pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_u8_x2(a: *mut u8, b: uint8x8x2_t) { - vst1_s8_x2(transmute(a), transmute(b)) +pub unsafe fn vst4_lane_u8(a: *mut u8, b: uint8x8x4_t) { + static_assert_uimm_bits!(LANE, 3); + vst4_lane_s8::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x3)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(st4, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -64785,21 +64267,23 @@ pub unsafe fn vst1_u8_x2(a: *mut u8, b: uint8x8x2_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_u8_x3(a: *mut u8, b: uint8x8x3_t) { - vst1_s8_x3(transmute(a), transmute(b)) +pub unsafe fn vst4_lane_u16(a: *mut u16, b: uint16x4x4_t) { + static_assert_uimm_bits!(LANE, 2); + vst4_lane_s16::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x4)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(st4, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -64808,21 +64292,23 @@ pub unsafe fn vst1_u8_x3(a: *mut u8, b: uint8x8x3_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_u8_x4(a: *mut u8, b: uint8x8x4_t) { - vst1_s8_x4(transmute(a), transmute(b)) +pub unsafe fn vst4q_lane_u16(a: *mut u16, b: uint16x8x4_t) { + static_assert_uimm_bits!(LANE, 3); + vst4q_lane_s16::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(st4, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -64831,21 +64317,23 @@ pub unsafe fn vst1_u8_x4(a: *mut u8, b: uint8x8x4_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_u8_x2(a: *mut u8, b: uint8x16x2_t) { - vst1q_s8_x2(transmute(a), transmute(b)) +pub unsafe fn vst4_lane_u32(a: *mut u32, b: uint32x2x4_t) { + static_assert_uimm_bits!(LANE, 1); + vst4_lane_s32::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x3)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(st4, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -64854,21 +64342,23 @@ pub unsafe fn vst1q_u8_x2(a: *mut u8, b: uint8x16x2_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_u8_x3(a: *mut u8, b: uint8x16x3_t) { - vst1q_s8_x3(transmute(a), transmute(b)) +pub unsafe fn vst4q_lane_u32(a: *mut u32, b: uint32x4x4_t) { + static_assert_uimm_bits!(LANE, 2); + vst4q_lane_s32::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x4)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(st4, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -64877,21 +64367,23 @@ pub unsafe fn vst1q_u8_x3(a: *mut u8, b: uint8x16x3_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_u8_x4(a: *mut u8, b: uint8x16x4_t) { - vst1q_s8_x4(transmute(a), transmute(b)) +pub unsafe fn vst4_lane_p8(a: *mut p8, b: poly8x8x4_t) { + static_assert_uimm_bits!(LANE, 3); + vst4_lane_s8::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(st4, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -64900,21 +64392,23 @@ pub unsafe fn vst1q_u8_x4(a: *mut u8, b: uint8x16x4_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_u16_x2(a: *mut u16, b: uint16x4x2_t) { - vst1_s16_x2(transmute(a), transmute(b)) +pub unsafe fn vst4_lane_p16(a: *mut p16, b: poly16x4x4_t) { + static_assert_uimm_bits!(LANE, 2); + vst4_lane_s16::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x3)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(st4, LANE = 0) )] +#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -64923,20 +64417,21 @@ pub unsafe fn vst1_u16_x2(a: *mut u16, b: uint16x4x2_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_u16_x3(a: *mut u16, b: uint16x4x3_t) { - vst1_s16_x3(transmute(a), transmute(b)) +pub unsafe fn vst4q_lane_p16(a: *mut p16, b: poly16x8x4_t) { + static_assert_uimm_bits!(LANE, 3); + vst4q_lane_s16::(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x4)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(nop) )] #[cfg_attr( not(target_arch = "arm"), @@ -64946,20 +64441,44 @@ pub unsafe fn vst1_u16_x3(a: *mut u16, b: uint16x4x3_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_u16_x4(a: *mut u16, b: uint16x4x4_t) { - vst1_s16_x4(transmute(a), transmute(b)) +pub unsafe fn vst4_p64(a: *mut p64, b: poly64x1x4_t) { + vst4_s64(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) { + core::ptr::write_unaligned(a.cast(), b) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s64)"] +#[doc = "## Safety"] +#[doc = " * Neon intrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg(not(target_arch = "arm"))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) { + core::ptr::write_unaligned(a.cast(), b) +} +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u64)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(nop) )] #[cfg_attr( not(target_arch = "arm"), @@ -64969,20 +64488,20 @@ pub unsafe fn vst1_u16_x4(a: *mut u16, b: uint16x4x4_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_u16_x2(a: *mut u16, b: uint16x8x2_t) { - vst1q_s16_x2(transmute(a), transmute(b)) +pub unsafe fn vst4_u64(a: *mut u64, b: uint64x1x4_t) { + vst4_s64(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x3)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(st4) )] #[cfg_attr( not(target_arch = "arm"), @@ -64992,20 +64511,20 @@ pub unsafe fn vst1q_u16_x2(a: *mut u16, b: uint16x8x2_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_u16_x3(a: *mut u16, b: uint16x8x3_t) { - vst1q_s16_x3(transmute(a), transmute(b)) +pub unsafe fn vst4_u8(a: *mut u8, b: uint8x8x4_t) { + vst4_s8(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x4)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(st4) )] #[cfg_attr( not(target_arch = "arm"), @@ -65015,20 +64534,20 @@ pub unsafe fn vst1q_u16_x3(a: *mut u16, b: uint16x8x3_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_u16_x4(a: *mut u16, b: uint16x8x4_t) { - vst1q_s16_x4(transmute(a), transmute(b)) +pub unsafe fn vst4q_u8(a: *mut u8, b: uint8x16x4_t) { + vst4q_s8(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(st4) )] #[cfg_attr( not(target_arch = "arm"), @@ -65038,20 +64557,20 @@ pub unsafe fn vst1q_u16_x4(a: *mut u16, b: uint16x8x4_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_u32_x2(a: *mut u32, b: uint32x2x2_t) { - vst1_s32_x2(transmute(a), transmute(b)) +pub unsafe fn vst4_u16(a: *mut u16, b: uint16x4x4_t) { + vst4_s16(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x3)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(st4) )] #[cfg_attr( not(target_arch = "arm"), @@ -65061,20 +64580,20 @@ pub unsafe fn vst1_u32_x2(a: *mut u32, b: uint32x2x2_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_u32_x3(a: *mut u32, b: uint32x2x3_t) { - vst1_s32_x3(transmute(a), transmute(b)) +pub unsafe fn vst4q_u16(a: *mut u16, b: uint16x8x4_t) { + vst4q_s16(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x4)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(st4) )] #[cfg_attr( not(target_arch = "arm"), @@ -65084,20 +64603,20 @@ pub unsafe fn vst1_u32_x3(a: *mut u32, b: uint32x2x3_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_u32_x4(a: *mut u32, b: uint32x2x4_t) { - vst1_s32_x4(transmute(a), transmute(b)) +pub unsafe fn vst4_u32(a: *mut u32, b: uint32x2x4_t) { + vst4_s32(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u32)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(st4) )] #[cfg_attr( not(target_arch = "arm"), @@ -65107,20 +64626,20 @@ pub unsafe fn vst1_u32_x4(a: *mut u32, b: uint32x2x4_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_u32_x2(a: *mut u32, b: uint32x4x2_t) { - vst1q_s32_x2(transmute(a), transmute(b)) +pub unsafe fn vst4q_u32(a: *mut u32, b: uint32x4x4_t) { + vst4q_s32(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x3)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(st4) )] #[cfg_attr( not(target_arch = "arm"), @@ -65130,20 +64649,20 @@ pub unsafe fn vst1q_u32_x2(a: *mut u32, b: uint32x4x2_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_u32_x3(a: *mut u32, b: uint32x4x3_t) { - vst1q_s32_x3(transmute(a), transmute(b)) +pub unsafe fn vst4_p8(a: *mut p8, b: poly8x8x4_t) { + vst4_s8(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x4)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_p8)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(st4) )] #[cfg_attr( not(target_arch = "arm"), @@ -65153,20 +64672,20 @@ pub unsafe fn vst1q_u32_x3(a: *mut u32, b: uint32x4x3_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_u32_x4(a: *mut u32, b: uint32x4x4_t) { - vst1q_s32_x4(transmute(a), transmute(b)) +pub unsafe fn vst4q_p8(a: *mut p8, b: poly8x16x4_t) { + vst4q_s8(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x2)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(st4) )] #[cfg_attr( not(target_arch = "arm"), @@ -65176,20 +64695,20 @@ pub unsafe fn vst1q_u32_x4(a: *mut u32, b: uint32x4x4_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_u64_x2(a: *mut u64, b: uint64x1x2_t) { - vst1_s64_x2(transmute(a), transmute(b)) +pub unsafe fn vst4_p16(a: *mut p16, b: poly16x4x4_t) { + vst4_s16(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x3)"] +#[doc = "Store multiple 4-element structures from four registers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_p16)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(st4) )] #[cfg_attr( not(target_arch = "arm"), @@ -65199,20 +64718,20 @@ pub unsafe fn vst1_u64_x2(a: *mut u64, b: uint64x1x2_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_u64_x3(a: *mut u64, b: uint64x1x3_t) { - vst1_s64_x3(transmute(a), transmute(b)) +pub unsafe fn vst4q_p16(a: *mut p16, b: poly16x8x4_t) { + vst4q_s16(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x4)"] +#[doc = "Store SIMD&FP register (immediate offset)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstrq_p128)"] #[doc = "## Safety"] #[doc = " * Neon intrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(nop) )] #[cfg_attr( not(target_arch = "arm"), @@ -65222,20 +64741,62 @@ pub unsafe fn vst1_u64_x3(a: *mut u64, b: uint64x1x3_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_u64_x4(a: *mut u64, b: uint64x1x4_t) { - vst1_s64_x4(transmute(a), transmute(b)) +pub unsafe fn vstrq_p128(a: *mut p128, b: p128) { + *a = b } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fsub) +)] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "stdarch_neon_fp16", since = "1.94.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vsub_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { + unsafe { simd_sub(a, b) } +} +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_f16)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f16"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(fsub) +)] +#[target_feature(enable = "neon,fp16")] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "stdarch_neon_fp16", since = "1.94.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +#[cfg(not(target_arch = "arm64ec"))] +pub fn vsubq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { + unsafe { simd_sub(a, b) } +} +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(fsub) )] #[cfg_attr( not(target_arch = "arm"), @@ -65245,20 +64806,18 @@ pub unsafe fn vst1_u64_x4(a: *mut u64, b: uint64x1x4_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_u64_x2(a: *mut u64, b: uint64x2x2_t) { - vst1q_s64_x2(transmute(a), transmute(b)) +pub fn vsub_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_f32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(fsub) )] #[cfg_attr( not(target_arch = "arm"), @@ -65268,20 +64827,18 @@ pub unsafe fn vst1q_u64_x2(a: *mut u64, b: uint64x2x2_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_u64_x3(a: *mut u64, b: uint64x2x3_t) { - vst1q_s64_x3(transmute(a), transmute(b)) +pub fn vsubq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(sub) )] #[cfg_attr( not(target_arch = "arm"), @@ -65291,20 +64848,18 @@ pub unsafe fn vst1q_u64_x3(a: *mut u64, b: uint64x2x3_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_u64_x4(a: *mut u64, b: uint64x2x4_t) { - vst1q_s64_x4(transmute(a), transmute(b)) +pub fn vsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(sub) )] #[cfg_attr( not(target_arch = "arm"), @@ -65314,20 +64869,18 @@ pub unsafe fn vst1q_u64_x4(a: *mut u64, b: uint64x2x4_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_p8_x2(a: *mut p8, b: poly8x8x2_t) { - vst1_s8_x2(transmute(a), transmute(b)) +pub fn vsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(sub) )] #[cfg_attr( not(target_arch = "arm"), @@ -65337,20 +64890,18 @@ pub unsafe fn vst1_p8_x2(a: *mut p8, b: poly8x8x2_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_p8_x3(a: *mut p8, b: poly8x8x3_t) { - vst1_s8_x3(transmute(a), transmute(b)) +pub fn vsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u16)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(sub) )] #[cfg_attr( not(target_arch = "arm"), @@ -65360,20 +64911,18 @@ pub unsafe fn vst1_p8_x3(a: *mut p8, b: poly8x8x3_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_p8_x4(a: *mut p8, b: poly8x8x4_t) { - vst1_s8_x4(transmute(a), transmute(b)) +pub fn vsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(sub) )] #[cfg_attr( not(target_arch = "arm"), @@ -65383,20 +64932,18 @@ pub unsafe fn vst1_p8_x4(a: *mut p8, b: poly8x8x4_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_p8_x2(a: *mut p8, b: poly8x16x2_t) { - vst1q_s8_x2(transmute(a), transmute(b)) +pub fn vsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(sub) )] #[cfg_attr( not(target_arch = "arm"), @@ -65406,20 +64953,18 @@ pub unsafe fn vst1q_p8_x2(a: *mut p8, b: poly8x16x2_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_p8_x3(a: *mut p8, b: poly8x16x3_t) { - vst1q_s8_x3(transmute(a), transmute(b)) +pub fn vsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(sub) )] #[cfg_attr( not(target_arch = "arm"), @@ -65429,20 +64974,18 @@ pub unsafe fn vst1q_p8_x3(a: *mut p8, b: poly8x16x3_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_p8_x4(a: *mut p8, b: poly8x16x4_t) { - vst1q_s8_x4(transmute(a), transmute(b)) +pub fn vsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u32)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(sub) )] #[cfg_attr( not(target_arch = "arm"), @@ -65452,20 +64995,18 @@ pub unsafe fn vst1q_p8_x4(a: *mut p8, b: poly8x16x4_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_p16_x2(a: *mut p16, b: poly16x4x2_t) { - vst1_s16_x2(transmute(a), transmute(b)) +pub fn vsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(sub) )] #[cfg_attr( not(target_arch = "arm"), @@ -65475,20 +65016,18 @@ pub unsafe fn vst1_p16_x2(a: *mut p16, b: poly16x4x2_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_p16_x3(a: *mut p16, b: poly16x4x3_t) { - vst1_s16_x3(transmute(a), transmute(b)) +pub fn vsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(sub) )] #[cfg_attr( not(target_arch = "arm"), @@ -65498,20 +65037,18 @@ pub unsafe fn vst1_p16_x3(a: *mut p16, b: poly16x4x3_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1_p16_x4(a: *mut p16, b: poly16x4x4_t) { - vst1_s16_x4(transmute(a), transmute(b)) +pub fn vsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x2)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(sub) )] #[cfg_attr( not(target_arch = "arm"), @@ -65521,20 +65058,18 @@ pub unsafe fn vst1_p16_x4(a: *mut p16, b: poly16x4x4_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_p16_x2(a: *mut p16, b: poly16x8x2_t) { - vst1q_s16_x2(transmute(a), transmute(b)) +pub fn vsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x3)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u64)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(sub) )] #[cfg_attr( not(target_arch = "arm"), @@ -65544,20 +65079,18 @@ pub unsafe fn vst1q_p16_x2(a: *mut p16, b: poly16x8x2_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_p16_x3(a: *mut p16, b: poly16x8x3_t) { - vst1q_s16_x3(transmute(a), transmute(b)) +pub fn vsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + unsafe { simd_sub(a, b) } } -#[doc = "Store multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x4)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s8)"] #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st1) + assert_instr(sub) )] #[cfg_attr( not(target_arch = "arm"), @@ -65567,198 +65100,304 @@ pub unsafe fn vst1q_p16_x3(a: *mut p16, b: poly16x8x3_t) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_p16_x4(a: *mut p16, b: poly16x8x4_t) { - vst1q_s16_x4(transmute(a), transmute(b)) +pub fn vsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe { simd_sub(a, b) } } +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64", ALIGN = 0))] -#[rustc_legacy_const_generics(2)] -unsafe fn vst1_v1i64(addr: *const i8, val: int64x1_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v1i64.p0")] - fn _vst1_v1i64(addr: *const i8, val: int64x1_t, align: i32); - } - _vst1_v1i64(addr, val, ALIGN) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { simd_sub(a, b) } } +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))] -#[rustc_legacy_const_generics(2)] -unsafe fn vst1_v2f32(addr: *const i8, val: float32x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2f32.p0")] - fn _vst1_v2f32(addr: *const i8, val: float32x2_t, align: i32); - } - _vst1_v2f32(addr, val, ALIGN) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { simd_sub(a, b) } } +#[doc = "Subtract"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))] -#[rustc_legacy_const_generics(2)] -unsafe fn vst1_v2i32(addr: *const i8, val: int32x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2i32.p0")] - fn _vst1_v2i32(addr: *const i8, val: int32x2_t, align: i32); - } - _vst1_v2i32(addr, val, ALIGN) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sub) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { simd_sub(a, b) } } +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16", ALIGN = 0))] -#[rustc_legacy_const_generics(2)] -unsafe fn vst1_v4i16(addr: *const i8, val: int16x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4i16.p0")] - fn _vst1_v4i16(addr: *const i8, val: int16x4_t, align: i32); - } - _vst1_v4i16(addr, val, ALIGN) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr( + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(subhn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t { + let d = vsubhn_s16(b, c); + vcombine_s8(a, d) } +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8", ALIGN = 0))] -#[rustc_legacy_const_generics(2)] -unsafe fn vst1_v8i8(addr: *const i8, val: int8x8_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8i8.p0")] - fn _vst1_v8i8(addr: *const i8, val: int8x8_t, align: i32); - } - _vst1_v8i8(addr, val, ALIGN) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr( + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(subhn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t { + let d = vsubhn_s32(b, c); + vcombine_s16(a, d) } +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8", ALIGN = 0))] -#[rustc_legacy_const_generics(2)] -unsafe fn vst1q_v16i8(addr: *const i8, val: int8x16_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v16i8.p0")] - fn _vst1q_v16i8(addr: *const i8, val: int8x16_t, align: i32); - } - _vst1q_v16i8(addr, val, ALIGN) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr( + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(subhn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t { + let d = vsubhn_s64(b, c); + vcombine_s32(a, d) } +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64", ALIGN = 0))] -#[rustc_legacy_const_generics(2)] -unsafe fn vst1q_v2i64(addr: *const i8, val: int64x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2i64.p0")] - fn _vst1q_v2i64(addr: *const i8, val: int64x2_t, align: i32); - } - _vst1q_v2i64(addr, val, ALIGN) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr( + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(subhn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t { + let d = vsubhn_u16(b, c); + vcombine_u8(a, d) } +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))] -#[rustc_legacy_const_generics(2)] -unsafe fn vst1q_v4f32(addr: *const i8, val: float32x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4f32.p0")] - fn _vst1q_v4f32(addr: *const i8, val: float32x4_t, align: i32); - } - _vst1q_v4f32(addr, val, ALIGN) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr( + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(subhn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t { + let d = vsubhn_u32(b, c); + vcombine_u16(a, d) } +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))] -#[rustc_legacy_const_generics(2)] -unsafe fn vst1q_v4i32(addr: *const i8, val: int32x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4i32.p0")] - fn _vst1q_v4i32(addr: *const i8, val: int32x4_t, align: i32); - } - _vst1q_v4i32(addr, val, ALIGN) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr( + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(subhn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t { + let d = vsubhn_u64(b, c); + vcombine_u32(a, d) } +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16", ALIGN = 0))] -#[rustc_legacy_const_generics(2)] -unsafe fn vst1q_v8i16(addr: *const i8, val: int16x8_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8i16.p0")] - fn _vst1q_v8i16(addr: *const i8, val: int16x8_t, align: i32); - } - _vst1q_v8i16(addr, val, ALIGN) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(subhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t { + let c: i16x8 = i16x8::new(8, 8, 8, 8, 8, 8, 8, 8); + unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v4f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_s32)"] #[inline] -#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] -unsafe fn vst1_v4f16(addr: *const i8, val: float16x4_t, align: i32) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4f16.p0")] - fn _vst1_v4f16(addr: *const i8, val: float16x4_t, align: i32); - } - _vst1_v4f16(addr, val, align) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(subhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t { + let c: i32x4 = i32x4::new(16, 16, 16, 16); + unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v8f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_s64)"] #[inline] -#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[target_feature(enable = "neon,fp16")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] -unsafe fn vst1q_v8f16(addr: *const i8, val: float16x8_t, align: i32) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8f16.p0")] - fn _vst1q_v8f16(addr: *const i8, val: float16x8_t, align: i32); - } - _vst1q_v8f16(addr, val, align) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(subhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t { + let c: i64x2 = i64x2::new(32, 32); + unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } } -#[doc = "Store multiple single-element structures from one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_u16)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop, LANE = 0) + assert_instr(subhn) )] -#[rustc_legacy_const_generics(2)] #[cfg_attr( not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0") @@ -65767,4262 +65406,1071 @@ unsafe fn vst1q_v8f16(addr: *const i8, val: float16x8_t, align: i32) { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub unsafe fn vst1q_lane_p64(a: *mut p64, b: poly64x2_t) { - static_assert_uimm_bits!(LANE, 1); - *a = simd_extract!(b, LANE as u32); +pub fn vsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { + let c: u16x8 = u16x8::new(8, 8, 8, 8, 8, 8, 8, 8); + unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(st2))] -pub unsafe fn vst2_f16(a: *mut f16, b: float16x4x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2.v4f16.p0" - )] - fn _vst2_f16(a: float16x4_t, b: float16x4_t, ptr: *mut i8); - } - _vst2_f16(b.0, b.1, a as _) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(subhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { + let c: u32x4 = u32x4::new(16, 16, 16, 16); + unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Subtract returning high narrow"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_u64)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(st2))] -pub unsafe fn vst2q_f16(a: *mut f16, b: float16x8x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2.v8f16.p0" - )] - fn _vst2q_f16(a: float16x8_t, b: float16x8_t, ptr: *mut i8); - } - _vst2q_f16(b.0, b.1, a as _) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(subhn) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { + let c: u64x2 = u64x2::new(32, 32); + unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(vst2))] -pub unsafe fn vst2_f16(a: *mut f16, b: float16x4x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0.v4f16")] - fn _vst2_f16(ptr: *mut i8, a: float16x4_t, b: float16x4_t, size: i32); +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ssubl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t { + unsafe { + let c: int16x8_t = simd_cast(a); + let d: int16x8_t = simd_cast(b); + simd_sub(c, d) } - _vst2_f16(a as _, b.0, b.1, 2) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(vst2))] -pub unsafe fn vst2q_f16(a: *mut f16, b: float16x8x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0.v8f16")] - fn _vst2q_f16(ptr: *mut i8, a: float16x8_t, b: float16x8_t, size: i32); +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ssubl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { + unsafe { + let c: int32x4_t = simd_cast(a); + let d: int32x4_t = simd_cast(b); + simd_sub(c, d) } - _vst2q_f16(a as _, b.0, b.1, 2) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st2))] -pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) { - crate::core_arch::macros::interleaving_store!(f32, 2, 2, a, b) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ssubl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { + unsafe { + let c: int64x2_t = simd_cast(a); + let d: int64x2_t = simd_cast(b); + simd_sub(c, d) + } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Unsigned Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_u8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st2))] -pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) { - crate::core_arch::macros::interleaving_store!(f32, 4, 2, a, b) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usubl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t { + unsafe { + let c: uint16x8_t = simd_cast(a); + let d: uint16x8_t = simd_cast(b); + simd_sub(c, d) + } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Unsigned Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_u16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st2))] -pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) { - crate::core_arch::macros::interleaving_store!(i8, 8, 2, a, b) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usubl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { + unsafe { + let c: uint32x4_t = simd_cast(a); + let d: uint32x4_t = simd_cast(b); + simd_sub(c, d) + } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Unsigned Subtract Long"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_u32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st2))] -pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) { - crate::core_arch::macros::interleaving_store!(i8, 16, 2, a, b) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usubl) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { + unsafe { + let c: uint64x2_t = simd_cast(a); + let d: uint64x2_t = simd_cast(b); + simd_sub(c, d) + } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st2))] -pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) { - crate::core_arch::macros::interleaving_store!(i16, 4, 2, a, b) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ssubw) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubw_s8(a: int16x8_t, b: int8x8_t) -> int16x8_t { + unsafe { simd_sub(a, simd_cast(b)) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_s16)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st2))] -pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) { - crate::core_arch::macros::interleaving_store!(i16, 8, 2, a, b) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ssubw) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubw_s16(a: int32x4_t, b: int16x4_t) -> int32x4_t { + unsafe { simd_sub(a, simd_cast(b)) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Signed Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_s32)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st2))] -pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) { - crate::core_arch::macros::interleaving_store!(i32, 2, 2, a, b) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(ssubw) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubw_s32(a: int64x2_t, b: int32x2_t) -> int64x2_t { + unsafe { simd_sub(a, simd_cast(b)) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Unsigned Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_u8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st2))] -pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) { - crate::core_arch::macros::interleaving_store!(i32, 4, 2, a, b) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usubw) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubw_u8(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t { + unsafe { simd_sub(a, simd_cast(b)) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Unsigned Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_u16)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usubw) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubw_u16(a: uint32x4_t, b: uint16x4_t) -> uint32x4_t { + unsafe { simd_sub(a, simd_cast(b)) } +} +#[doc = "Unsigned Subtract Wide"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usubw) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t { + unsafe { simd_sub(a, simd_cast(b)) } +} +#[doc = "Dot product index form with signed and unsigned integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_lane_s32)"] +#[inline] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))] +#[cfg_attr( + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(sudot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsudot_lane_s32(a: int32x2_t, b: int8x8_t, c: uint8x8_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + let c = vreinterpret_u32_u8(c); + let c = vdup_lane_u32::(c); + vusdot_s32(a, vreinterpret_u8_u32(c), b) +} +#[doc = "Dot product index form with signed and unsigned integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_lane_s32)"] +#[inline] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))] +#[cfg_attr( + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(sudot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsudotq_lane_s32(a: int32x4_t, b: int8x16_t, c: uint8x8_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 1); + let c = vreinterpret_u32_u8(c); + let c = vdupq_lane_u32::(c); + vusdotq_s32(a, vreinterpretq_u8_u32(c), b) +} +#[doc = "Dot product index form with signed and unsigned integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_laneq_s32)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 1))] +#[cfg_attr( + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(sudot, LANE = 3) +)] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")] +pub fn vsudot_laneq_s32(a: int32x2_t, b: int8x8_t, c: uint8x16_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 2); + let c = vreinterpretq_u32_u8(c); + let c = vdup_laneq_u32::(c); + vusdot_s32(a, vreinterpret_u8_u32(c), b) +} +#[doc = "Dot product index form with signed and unsigned integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_laneq_s32)"] #[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 1))] +#[cfg_attr( + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(sudot, LANE = 3) +)] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")] +pub fn vsudotq_laneq_s32(a: int32x4_t, b: int8x16_t, c: uint8x16_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + let c = vreinterpretq_u32_u8(c); + let c = vdupq_laneq_u32::(c); + vusdotq_s32(a, vreinterpretq_u8_u32(c), b) +} +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1)"] +#[inline] +#[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst2))] -pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) { +#[cfg_attr(test, assert_instr(vtbl))] +fn vtbl1(a: int8x8_t, b: int8x8_t) -> int8x8_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v2f32.p0")] - fn _vst2_f32(ptr: *mut i8, a: float32x2_t, b: float32x2_t, size: i32); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl1")] + fn _vtbl1(a: int8x8_t, b: int8x8_t) -> int8x8_t; } - _vst2_f32(a as _, b.0, b.1, 4) + unsafe { _vtbl1(a, b) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_s8)"] #[inline] +#[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst2))] -pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v4f32.p0")] - fn _vst2q_f32(ptr: *mut i8, a: float32x4_t, b: float32x4_t, size: i32); - } - _vst2q_f32(a as _, b.0, b.1, 4) +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + vtbl1(a, b) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_u8)"] #[inline] +#[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst2))] -pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v8i8.p0")] - fn _vst2_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, size: i32); - } - _vst2_s8(a as _, b.0, b.1, 1) +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + unsafe { transmute(vtbl1(transmute(a), transmute(b))) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_p8)"] #[inline] +#[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst2))] -pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v16i8.p0")] - fn _vst2q_s8(ptr: *mut i8, a: int8x16_t, b: int8x16_t, size: i32); - } - _vst2q_s8(a as _, b.0, b.1, 1) +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t { + unsafe { transmute(vtbl1(transmute(a), transmute(b))) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2)"] #[inline] +#[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst2))] -pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) { +#[cfg_attr(test, assert_instr(vtbl))] +fn vtbl2(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v4i16.p0")] - fn _vst2_s16(ptr: *mut i8, a: int16x4_t, b: int16x4_t, size: i32); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl2")] + fn _vtbl2(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; } - _vst2_s16(a as _, b.0, b.1, 2) + unsafe { _vtbl2(a, b, c) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_s8)"] #[inline] +#[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst2))] -pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v8i16.p0")] - fn _vst2q_s16(ptr: *mut i8, a: int16x8_t, b: int16x8_t, size: i32); - } - _vst2q_s16(a as _, b.0, b.1, 2) +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t { + vtbl2(a.0, a.1, b) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_u8)"] #[inline] +#[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst2))] -pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v2i32.p0")] - fn _vst2_s32(ptr: *mut i8, a: int32x2_t, b: int32x2_t, size: i32); - } - _vst2_s32(a as _, b.0, b.1, 4) +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t { + unsafe { transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_p8)"] #[inline] +#[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst2))] -pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v4i32.p0")] - fn _vst2q_s32(ptr: *mut i8, a: int32x4_t, b: int32x4_t, size: i32); - } - _vst2q_s32(a as _, b.0, b.1, 4) +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t { + unsafe { transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst2_lane_f16(a: *mut f16, b: float16x4x2_t) { - static_assert_uimm_bits!(LANE, 2); +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +fn vtbl3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t { unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2lane.v4f16.p0" - )] - fn _vst2_lane_f16(a: float16x4_t, b: float16x4_t, n: i64, ptr: *mut i8); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl3")] + fn _vtbl3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t; } - _vst2_lane_f16(b.0, b.1, LANE as i64, a as _) + unsafe { _vtbl3(a, b, c, d) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst2q_lane_f16(a: *mut f16, b: float16x8x2_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2lane.v8f16.p0" - )] - fn _vst2q_lane_f16(a: float16x8_t, b: float16x8_t, n: i64, ptr: *mut i8); - } - _vst2q_lane_f16(b.0, b.1, LANE as i64, a as _) +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t { + vtbl3(a.0, a.1, a.2, b) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_u8)"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(vst2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst2_lane_f16(a: *mut f16, b: float16x4x2_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0.v4f16")] - fn _vst2_lane_f16(ptr: *mut i8, a: float16x4_t, b: float16x4_t, n: i32, size: i32); +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t { + unsafe { + transmute(vtbl3( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(b), + )) } - _vst2_lane_f16(a as _, b.0, b.1, LANE, 2) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_p8)"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(vst2, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst2q_lane_f16(a: *mut f16, b: float16x8x2_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0.v8f16")] - fn _vst2q_lane_f16(ptr: *mut i8, a: float16x8_t, b: float16x8_t, n: i32, size: i32); +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t { + unsafe { + transmute(vtbl3( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(b), + )) } - _vst2q_lane_f16(a as _, b.0, b.1, LANE, 2) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2_lane_f32(a: *mut f32, b: float32x2x2_t) { - static_assert_uimm_bits!(LANE, 1); +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +fn vtbl4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t { unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2lane.v2f32.p0" - )] - fn _vst2_lane_f32(a: float32x2_t, b: float32x2_t, n: i64, ptr: *mut i8); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl4")] + fn _vtbl4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t; } - _vst2_lane_f32(b.0, b.1, LANE as i64, a as _) + unsafe { _vtbl4(a, b, c, d, e) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_s8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2q_lane_f32(a: *mut f32, b: float32x4x2_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2lane.v4f32.p0" - )] - fn _vst2q_lane_f32(a: float32x4_t, b: float32x4_t, n: i64, ptr: *mut i8); - } - _vst2q_lane_f32(b.0, b.1, LANE as i64, a as _) +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t { + vtbl4(a.0, a.1, a.2, a.3, b) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_u8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2_lane_s8(a: *mut i8, b: int8x8x2_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2lane.v8i8.p0" - )] - fn _vst2_lane_s8(a: int8x8_t, b: int8x8_t, n: i64, ptr: *mut i8); +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t { + unsafe { + transmute(vtbl4( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(a.3), + transmute(b), + )) } - _vst2_lane_s8(b.0, b.1, LANE as i64, a as _) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_p8)"] #[inline] #[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2_lane_s16(a: *mut i16, b: int16x4x2_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2lane.v4i16.p0" - )] - fn _vst2_lane_s16(a: int16x4_t, b: int16x4_t, n: i64, ptr: *mut i8); +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbl))] +pub fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t { + unsafe { + transmute(vtbl4( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(a.3), + transmute(b), + )) } - _vst2_lane_s16(b.0, b.1, LANE as i64, a as _) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2q_lane_s16(a: *mut i16, b: int16x8x2_t) { - static_assert_uimm_bits!(LANE, 3); +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +fn vtbx1(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2lane.v8i16.p0" - )] - fn _vst2q_lane_s16(a: int16x8_t, b: int16x8_t, n: i64, ptr: *mut i8); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx1")] + fn _vtbx1(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; } - _vst2q_lane_s16(b.0, b.1, LANE as i64, a as _) + unsafe { _vtbx1(a, b, c) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_s8)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2_lane_s32(a: *mut i32, b: int32x2x2_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2lane.v2i32.p0" - )] - fn _vst2_lane_s32(a: int32x2_t, b: int32x2_t, n: i64, ptr: *mut i8); - } - _vst2_lane_s32(b.0, b.1, LANE as i64, a as _) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { + vtbx1(a, b, c) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_u8)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st2, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst2q_lane_s32(a: *mut i32, b: int32x4x2_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2lane.v4i32.p0" - )] - fn _vst2q_lane_s32(a: int32x4_t, b: int32x4_t, n: i64, ptr: *mut i8); - } - _vst2q_lane_s32(b.0, b.1, LANE as i64, a as _) +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { + unsafe { transmute(vtbx1(transmute(a), transmute(b), transmute(c))) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_p8)"] #[inline] +#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t { + unsafe { transmute(vtbx1(transmute(a), transmute(b), transmute(c))) } +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2)"] +#[inline] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst2, LANE = 0))] -#[rustc_legacy_const_generics(2)] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst2_lane_f32(a: *mut f32, b: float32x2x2_t) { - static_assert_uimm_bits!(LANE, 1); +#[cfg_attr(test, assert_instr(vtbx))] +fn vtbx2(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v2f32.p0")] - fn _vst2_lane_f32(ptr: *mut i8, a: float32x2_t, b: float32x2_t, n: i32, size: i32); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx2")] + fn _vtbx2(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t; } - _vst2_lane_f32(a as _, b.0, b.1, LANE, 4) + unsafe { _vtbx2(a, b, c, d) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_s8)"] #[inline] +#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t { + vtbx2(a, b.0, b.1, c) +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_u8)"] +#[inline] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst2, LANE = 0))] -#[rustc_legacy_const_generics(2)] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst2q_lane_f32(a: *mut f32, b: float32x4x2_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v4f32.p0")] - fn _vst2q_lane_f32(ptr: *mut i8, a: float32x4_t, b: float32x4_t, n: i32, size: i32); +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t { + unsafe { + transmute(vtbx2( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(c), + )) } - _vst2q_lane_f32(a as _, b.0, b.1, LANE, 4) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_p8)"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst2, LANE = 0))] -#[rustc_legacy_const_generics(2)] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst2_lane_s8(a: *mut i8, b: int8x8x2_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v8i8.p0")] - fn _vst2_lane_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, n: i32, size: i32); +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t { + unsafe { + transmute(vtbx2( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(c), + )) } - _vst2_lane_s8(a as _, b.0, b.1, LANE, 1) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3)"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst2, LANE = 0))] -#[rustc_legacy_const_generics(2)] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst2_lane_s16(a: *mut i16, b: int16x4x2_t) { - static_assert_uimm_bits!(LANE, 2); +#[cfg_attr(test, assert_instr(vtbx))] +fn vtbx3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v4i16.p0")] - fn _vst2_lane_s16(ptr: *mut i8, a: int16x4_t, b: int16x4_t, n: i32, size: i32); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx3")] + fn _vtbx3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t; } - _vst2_lane_s16(a as _, b.0, b.1, LANE, 2) + unsafe { _vtbx3(a, b, c, d, e) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_s8)"] #[inline] +#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t { + vtbx3(a, b.0, b.1, b.2, c) +} +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_u8)"] +#[inline] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst2, LANE = 0))] -#[rustc_legacy_const_generics(2)] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst2q_lane_s16(a: *mut i16, b: int16x8x2_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v8i16.p0")] - fn _vst2q_lane_s16(ptr: *mut i8, a: int16x8_t, b: int16x8_t, n: i32, size: i32); +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t { + unsafe { + transmute(vtbx3( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(c), + )) } - _vst2q_lane_s16(a as _, b.0, b.1, LANE, 2) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_p8)"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst2, LANE = 0))] -#[rustc_legacy_const_generics(2)] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst2_lane_s32(a: *mut i32, b: int32x2x2_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v2i32.p0")] - fn _vst2_lane_s32(ptr: *mut i8, a: int32x2_t, b: int32x2_t, n: i32, size: i32); +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t { + unsafe { + transmute(vtbx3( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(c), + )) } - _vst2_lane_s32(a as _, b.0, b.1, LANE, 4) } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4)"] #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst2, LANE = 0))] -#[rustc_legacy_const_generics(2)] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst2q_lane_s32(a: *mut i32, b: int32x4x2_t) { - static_assert_uimm_bits!(LANE, 2); +#[cfg_attr(test, assert_instr(vtbx))] +fn vtbx4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t, f: int8x8_t) -> int8x8_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v4i32.p0")] - fn _vst2q_lane_s32(ptr: *mut i8, a: int32x4_t, b: int32x4_t, n: i32, size: i32); + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx4")] + fn _vtbx4( + a: int8x8_t, + b: int8x8_t, + c: int8x8_t, + d: int8x8_t, + e: int8x8_t, + f: int8x8_t, + ) -> int8x8_t; } - _vst2q_lane_s32(a as _, b.0, b.1, LANE, 4) + unsafe { _vtbx4(a, b, c, d, e, f) } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_s8)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2_lane_u8(a: *mut u8, b: uint8x8x2_t) { - static_assert_uimm_bits!(LANE, 3); - vst2_lane_s8::(transmute(a), transmute(b)) +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t { + unsafe { + vtbx4( + a, + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(b.3), + c, + ) + } } -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_s8)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2_lane_u16(a: *mut u16, b: uint16x4x2_t) { - static_assert_uimm_bits!(LANE, 2); - vst2_lane_s16::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2q_lane_u16(a: *mut u16, b: uint16x8x2_t) { - static_assert_uimm_bits!(LANE, 3); - vst2q_lane_s16::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2_lane_u32(a: *mut u32, b: uint32x2x2_t) { - static_assert_uimm_bits!(LANE, 1); - vst2_lane_s32::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2q_lane_u32(a: *mut u32, b: uint32x4x2_t) { - static_assert_uimm_bits!(LANE, 2); - vst2q_lane_s32::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2_lane_p8(a: *mut p8, b: poly8x8x2_t) { - static_assert_uimm_bits!(LANE, 3); - vst2_lane_s8::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2_lane_p16(a: *mut p16, b: poly16x4x2_t) { - static_assert_uimm_bits!(LANE, 2); - vst2_lane_s16::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2q_lane_p16(a: *mut p16, b: poly16x8x2_t) { - static_assert_uimm_bits!(LANE, 3); - vst2q_lane_s16::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2_p64(a: *mut p64, b: poly64x1x2_t) { - vst2_s64(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,v7")] +#[cfg(target_arch = "arm")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) { - core::ptr::write_unaligned(a.cast(), b) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) { - core::ptr::write_unaligned(a.cast(), b) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2_u64(a: *mut u64, b: uint64x1x2_t) { - vst2_s64(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2_u8(a: *mut u8, b: uint8x8x2_t) { - vst2_s8(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2q_u8(a: *mut u8, b: uint8x16x2_t) { - vst2q_s8(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2_u16(a: *mut u16, b: uint16x4x2_t) { - vst2_s16(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2q_u16(a: *mut u16, b: uint16x8x2_t) { - vst2q_s16(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2_u32(a: *mut u32, b: uint32x2x2_t) { - vst2_s32(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2q_u32(a: *mut u32, b: uint32x4x2_t) { - vst2q_s32(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2_p8(a: *mut p8, b: poly8x8x2_t) { - vst2_s8(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2q_p8(a: *mut p8, b: poly8x16x2_t) { - vst2q_s8(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2_p16(a: *mut p16, b: poly16x4x2_t) { - vst2_s16(transmute(a), transmute(b)) -} -#[doc = "Store multiple 2-element structures from two registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st2) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst2q_p16(a: *mut p16, b: poly16x8x2_t) { - vst2q_s16(transmute(a), transmute(b)) +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t { + let mut b: int8x8x4_t = b; + unsafe { + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]); + b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]); + b.2 = simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]); + b.3 = simd_shuffle!(b.3, b.3, [7, 6, 5, 4, 3, 2, 1, 0]); + let c: int8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int8x8_t = vtbx4( + a, + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(b.3), + c, + ); + simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + } } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_u8)"] #[inline] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,v7")] #[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(vst3))] -pub unsafe fn vst3_f16(a: *mut f16, b: float16x4x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v4f16")] - fn _vst3_f16(ptr: *mut i8, a: float16x4_t, b: float16x4_t, c: float16x4_t, size: i32); +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t { + unsafe { + transmute(vtbx4( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(b.3), + transmute(c), + )) } - _vst3_f16(a as _, b.0, b.1, b.2, 2) } -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] +#[doc = "Extended table look-up"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_p8)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(vst3))] -pub unsafe fn vst3q_f16(a: *mut f16, b: float16x8x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v8f16")] - fn _vst3q_f16(ptr: *mut i8, a: float16x8_t, b: float16x8_t, c: float16x8_t, size: i32); - } - _vst3q_f16(a as _, b.0, b.1, b.2, 2) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(st3))] -pub unsafe fn vst3_f16(a: *mut f16, b: float16x4x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3.v4f16.p0" - )] - fn _vst3_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t, ptr: *mut i8); - } - _vst3_f16(b.0, b.1, b.2, a as _) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(st3))] -pub unsafe fn vst3q_f16(a: *mut f16, b: float16x8x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3.v8f16.p0" - )] - fn _vst3q_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t, ptr: *mut i8); - } - _vst3q_f16(b.0, b.1, b.2, a as _) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst3))] -pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) { - crate::core_arch::macros::interleaving_store!(f32, 2, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst3))] -pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) { - crate::core_arch::macros::interleaving_store!(f32, 4, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst3))] -pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) { - crate::core_arch::macros::interleaving_store!(i8, 8, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst3))] -pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) { - crate::core_arch::macros::interleaving_store!(i8, 16, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst3))] -pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) { - crate::core_arch::macros::interleaving_store!(i16, 4, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst3))] -pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) { - crate::core_arch::macros::interleaving_store!(i16, 8, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst3))] -pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) { - crate::core_arch::macros::interleaving_store!(i32, 2, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst3))] -pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) { - crate::core_arch::macros::interleaving_store!(i32, 4, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3))] -pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) { - crate::core_arch::macros::interleaving_store!(f32, 2, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3))] -pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) { - crate::core_arch::macros::interleaving_store!(f32, 4, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3))] -pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) { - crate::core_arch::macros::interleaving_store!(i8, 8, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3))] -pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) { - crate::core_arch::macros::interleaving_store!(i8, 16, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3))] -pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) { - crate::core_arch::macros::interleaving_store!(i16, 4, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3))] -pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) { - crate::core_arch::macros::interleaving_store!(i16, 8, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3))] -pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) { - crate::core_arch::macros::interleaving_store!(i32, 2, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st3))] -pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) { - crate::core_arch::macros::interleaving_store!(i32, 4, 3, a, b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(vst3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst3_lane_f16(a: *mut f16, b: float16x4x3_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v4f16")] - fn _vst3_lane_f16( - ptr: *mut i8, - a: float16x4_t, - b: float16x4_t, - c: float16x4_t, - n: i32, - size: i32, - ); - } - _vst3_lane_f16(a as _, b.0, b.1, b.2, LANE, 4) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(vst3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst3q_lane_f16(a: *mut f16, b: float16x8x3_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v8f16")] - fn _vst3q_lane_f16( - ptr: *mut i8, - a: float16x8_t, - b: float16x8_t, - c: float16x8_t, - n: i32, - size: i32, - ); - } - _vst3q_lane_f16(a as _, b.0, b.1, b.2, LANE, 4) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst3_lane_f16(a: *mut f16, b: float16x4x3_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3lane.v4f16.p0" - )] - fn _vst3_lane_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t, n: i64, ptr: *mut i8); - } - _vst3_lane_f16(b.0, b.1, b.2, LANE as i64, a as _) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst3q_lane_f16(a: *mut f16, b: float16x8x3_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3lane.v8f16.p0" - )] - fn _vst3q_lane_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t, n: i64, ptr: *mut i8); - } - _vst3q_lane_f16(b.0, b.1, b.2, LANE as i64, a as _) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst3_lane_f32(a: *mut f32, b: float32x2x3_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v2f32")] - fn _vst3_lane_f32( - ptr: *mut i8, - a: float32x2_t, - b: float32x2_t, - c: float32x2_t, - n: i32, - size: i32, - ); - } - _vst3_lane_f32(a as _, b.0, b.1, b.2, LANE, 4) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst3q_lane_f32(a: *mut f32, b: float32x4x3_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v4f32")] - fn _vst3q_lane_f32( - ptr: *mut i8, - a: float32x4_t, - b: float32x4_t, - c: float32x4_t, - n: i32, - size: i32, - ); - } - _vst3q_lane_f32(a as _, b.0, b.1, b.2, LANE, 4) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst3_lane_s8(a: *mut i8, b: int8x8x3_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v8i8")] - fn _vst3_lane_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, n: i32, size: i32); - } - _vst3_lane_s8(a as _, b.0, b.1, b.2, LANE, 1) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst3_lane_s16(a: *mut i16, b: int16x4x3_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v4i16")] - fn _vst3_lane_s16( - ptr: *mut i8, - a: int16x4_t, - b: int16x4_t, - c: int16x4_t, - n: i32, - size: i32, - ); - } - _vst3_lane_s16(a as _, b.0, b.1, b.2, LANE, 2) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst3q_lane_s16(a: *mut i16, b: int16x8x3_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v8i16")] - fn _vst3q_lane_s16( - ptr: *mut i8, - a: int16x8_t, - b: int16x8_t, - c: int16x8_t, - n: i32, - size: i32, - ); - } - _vst3q_lane_s16(a as _, b.0, b.1, b.2, LANE, 2) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] #[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst3, LANE = 0))] -#[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst3_lane_s32(a: *mut i32, b: int32x2x3_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v2i32")] - fn _vst3_lane_s32( - ptr: *mut i8, - a: int32x2_t, - b: int32x2_t, - c: int32x2_t, - n: i32, - size: i32, - ); +#[cfg_attr(test, assert_instr(vtbx))] +pub fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t { + unsafe { + transmute(vtbx4( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(b.3), + transmute(c), + )) } - _vst3_lane_s32(a as _, b.0, b.1, b.2, LANE, 4) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst3, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst3q_lane_s32(a: *mut i32, b: int32x4x3_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v4i32")] - fn _vst3q_lane_s32( - ptr: *mut i8, - a: int32x4_t, - b: int32x4_t, - c: int32x4_t, - n: i32, - size: i32, - ); - } - _vst3q_lane_s32(a as _, b.0, b.1, b.2, LANE, 4) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst3_lane_f32(a: *mut f32, b: float32x2x3_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3lane.v2f32.p0" - )] - fn _vst3_lane_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t, n: i64, ptr: *mut i8); - } - _vst3_lane_f32(b.0, b.1, b.2, LANE as i64, a as _) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst3q_lane_f32(a: *mut f32, b: float32x4x3_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3lane.v4f32.p0" - )] - fn _vst3q_lane_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t, n: i64, ptr: *mut i8); - } - _vst3q_lane_f32(b.0, b.1, b.2, LANE as i64, a as _) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst3_lane_s8(a: *mut i8, b: int8x8x3_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3lane.v8i8.p0" - )] - fn _vst3_lane_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t, n: i64, ptr: *mut i8); - } - _vst3_lane_s8(b.0, b.1, b.2, LANE as i64, a as _) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst3_lane_s16(a: *mut i16, b: int16x4x3_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3lane.v4i16.p0" - )] - fn _vst3_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t, n: i64, ptr: *mut i8); - } - _vst3_lane_s16(b.0, b.1, b.2, LANE as i64, a as _) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst3q_lane_s16(a: *mut i16, b: int16x8x3_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3lane.v8i16.p0" - )] - fn _vst3q_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t, n: i64, ptr: *mut i8); - } - _vst3q_lane_s16(b.0, b.1, b.2, LANE as i64, a as _) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst3_lane_s32(a: *mut i32, b: int32x2x3_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3lane.v2i32.p0" - )] - fn _vst3_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t, n: i64, ptr: *mut i8); - } - _vst3_lane_s32(b.0, b.1, b.2, LANE as i64, a as _) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st3, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst3q_lane_s32(a: *mut i32, b: int32x4x3_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3lane.v4i32.p0" - )] - fn _vst3q_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t, n: i64, ptr: *mut i8); - } - _vst3q_lane_s32(b.0, b.1, b.2, LANE as i64, a as _) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3_lane_u8(a: *mut u8, b: uint8x8x3_t) { - static_assert_uimm_bits!(LANE, 3); - vst3_lane_s8::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3_lane_u16(a: *mut u16, b: uint16x4x3_t) { - static_assert_uimm_bits!(LANE, 2); - vst3_lane_s16::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3q_lane_u16(a: *mut u16, b: uint16x8x3_t) { - static_assert_uimm_bits!(LANE, 3); - vst3q_lane_s16::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3_lane_u32(a: *mut u32, b: uint32x2x3_t) { - static_assert_uimm_bits!(LANE, 1); - vst3_lane_s32::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3q_lane_u32(a: *mut u32, b: uint32x4x3_t) { - static_assert_uimm_bits!(LANE, 2); - vst3q_lane_s32::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3_lane_p8(a: *mut p8, b: poly8x8x3_t) { - static_assert_uimm_bits!(LANE, 3); - vst3_lane_s8::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3_lane_p16(a: *mut p16, b: poly16x4x3_t) { - static_assert_uimm_bits!(LANE, 2); - vst3_lane_s16::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3q_lane_p16(a: *mut p16, b: poly16x8x3_t) { - static_assert_uimm_bits!(LANE, 3); - vst3q_lane_s16::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3_p64(a: *mut p64, b: poly64x1x3_t) { - vst3_s64(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) { - core::ptr::write_unaligned(a.cast(), b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) { - core::ptr::write_unaligned(a.cast(), b) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3_u64(a: *mut u64, b: uint64x1x3_t) { - vst3_s64(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3_u8(a: *mut u8, b: uint8x8x3_t) { - vst3_s8(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3q_u8(a: *mut u8, b: uint8x16x3_t) { - vst3q_s8(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3_u16(a: *mut u16, b: uint16x4x3_t) { - vst3_s16(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3q_u16(a: *mut u16, b: uint16x8x3_t) { - vst3q_s16(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3_u32(a: *mut u32, b: uint32x2x3_t) { - vst3_s32(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3q_u32(a: *mut u32, b: uint32x4x3_t) { - vst3q_s32(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3_p8(a: *mut p8, b: poly8x8x3_t) { - vst3_s8(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3q_p8(a: *mut p8, b: poly8x16x3_t) { - vst3q_s8(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3_p16(a: *mut p16, b: poly16x4x3_t) { - vst3_s16(transmute(a), transmute(b)) -} -#[doc = "Store multiple 3-element structures from three registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st3) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst3q_p16(a: *mut p16, b: poly16x8x3_t) { - vst3q_s16(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(vst4))] -pub unsafe fn vst4_f16(a: *mut f16, b: float16x4x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v4f16")] - fn _vst4_f16( - ptr: *mut i8, - a: float16x4_t, - b: float16x4_t, - c: float16x4_t, - d: float16x4_t, - size: i32, - ); - } - _vst4_f16(a as _, b.0, b.1, b.2, b.3, 2) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(vst4))] -pub unsafe fn vst4q_f16(a: *mut f16, b: float16x8x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v8f16")] - fn _vst4q_f16( - ptr: *mut i8, - a: float16x8_t, - b: float16x8_t, - c: float16x8_t, - d: float16x8_t, - size: i32, - ); - } - _vst4q_f16(a as _, b.0, b.1, b.2, b.3, 2) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(st4))] -pub unsafe fn vst4_f16(a: *mut f16, b: float16x4x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4.v4f16.p0" - )] - fn _vst4_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t, d: float16x4_t, ptr: *mut i8); - } - _vst4_f16(b.0, b.1, b.2, b.3, a as _) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -#[cfg_attr(test, assert_instr(st4))] -pub unsafe fn vst4q_f16(a: *mut f16, b: float16x8x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4.v8f16.p0" - )] - fn _vst4q_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t, d: float16x8_t, ptr: *mut i8); - } - _vst4q_f16(b.0, b.1, b.2, b.3, a as _) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst4))] -pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v2f32")] - fn _vst4_f32( - ptr: *mut i8, - a: float32x2_t, - b: float32x2_t, - c: float32x2_t, - d: float32x2_t, - size: i32, - ); - } - _vst4_f32(a as _, b.0, b.1, b.2, b.3, 4) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst4))] -pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v4f32")] - fn _vst4q_f32( - ptr: *mut i8, - a: float32x4_t, - b: float32x4_t, - c: float32x4_t, - d: float32x4_t, - size: i32, - ); - } - _vst4q_f32(a as _, b.0, b.1, b.2, b.3, 4) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst4))] -pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v8i8")] - fn _vst4_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, size: i32); - } - _vst4_s8(a as _, b.0, b.1, b.2, b.3, 1) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst4))] -pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v16i8")] - fn _vst4q_s8( - ptr: *mut i8, - a: int8x16_t, - b: int8x16_t, - c: int8x16_t, - d: int8x16_t, - size: i32, - ); - } - _vst4q_s8(a as _, b.0, b.1, b.2, b.3, 1) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst4))] -pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v4i16")] - fn _vst4_s16( - ptr: *mut i8, - a: int16x4_t, - b: int16x4_t, - c: int16x4_t, - d: int16x4_t, - size: i32, - ); - } - _vst4_s16(a as _, b.0, b.1, b.2, b.3, 2) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst4))] -pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v8i16")] - fn _vst4q_s16( - ptr: *mut i8, - a: int16x8_t, - b: int16x8_t, - c: int16x8_t, - d: int16x8_t, - size: i32, - ); - } - _vst4q_s16(a as _, b.0, b.1, b.2, b.3, 2) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst4))] -pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v2i32")] - fn _vst4_s32( - ptr: *mut i8, - a: int32x2_t, - b: int32x2_t, - c: int32x2_t, - d: int32x2_t, - size: i32, - ); - } - _vst4_s32(a as _, b.0, b.1, b.2, b.3, 4) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vst4))] -pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v4i32")] - fn _vst4q_s32( - ptr: *mut i8, - a: int32x4_t, - b: int32x4_t, - c: int32x4_t, - d: int32x4_t, - size: i32, - ); - } - _vst4q_s32(a as _, b.0, b.1, b.2, b.3, 4) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4))] -pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) { - crate::core_arch::macros::interleaving_store!(f32, 2, 4, a, b) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4))] -pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) { - crate::core_arch::macros::interleaving_store!(f32, 4, 4, a, b) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4))] -pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) { - crate::core_arch::macros::interleaving_store!(i8, 8, 4, a, b) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4))] -pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) { - crate::core_arch::macros::interleaving_store!(i8, 16, 4, a, b) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4))] -pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) { - crate::core_arch::macros::interleaving_store!(i16, 4, 4, a, b) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4))] -pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) { - crate::core_arch::macros::interleaving_store!(i16, 8, 4, a, b) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4))] -pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) { - crate::core_arch::macros::interleaving_store!(i32, 2, 4, a, b) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st4))] -pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) { - crate::core_arch::macros::interleaving_store!(i32, 4, 4, a, b) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(vst4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst4_lane_f16(a: *mut f16, b: float16x4x4_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v4f16")] - fn _vst4_lane_f16( - ptr: *mut i8, - a: float16x4_t, - b: float16x4_t, - c: float16x4_t, - d: float16x4_t, - n: i32, - size: i32, - ); - } - _vst4_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(test, assert_instr(vst4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst4q_lane_f16(a: *mut f16, b: float16x8x4_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v8f16")] - fn _vst4q_lane_f16( - ptr: *mut i8, - a: float16x8_t, - b: float16x8_t, - c: float16x8_t, - d: float16x8_t, - n: i32, - size: i32, - ); - } - _vst4q_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst4_lane_f16(a: *mut f16, b: float16x4x4_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4lane.v4f16.p0" - )] - fn _vst4_lane_f16( - a: float16x4_t, - b: float16x4_t, - c: float16x4_t, - d: float16x4_t, - n: i64, - ptr: *mut i8, - ); - } - _vst4_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))] -#[unstable(feature = "stdarch_neon_f16", issue = "136306")] -#[cfg(not(target_arch = "arm64ec"))] -pub unsafe fn vst4q_lane_f16(a: *mut f16, b: float16x8x4_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4lane.v8f16.p0" - )] - fn _vst4q_lane_f16( - a: float16x8_t, - b: float16x8_t, - c: float16x8_t, - d: float16x8_t, - n: i64, - ptr: *mut i8, - ); - } - _vst4q_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst4_lane_f32(a: *mut f32, b: float32x2x4_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v2f32")] - fn _vst4_lane_f32( - ptr: *mut i8, - a: float32x2_t, - b: float32x2_t, - c: float32x2_t, - d: float32x2_t, - n: i32, - size: i32, - ); - } - _vst4_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst4q_lane_f32(a: *mut f32, b: float32x4x4_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v4f32")] - fn _vst4q_lane_f32( - ptr: *mut i8, - a: float32x4_t, - b: float32x4_t, - c: float32x4_t, - d: float32x4_t, - n: i32, - size: i32, - ); - } - _vst4q_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst4_lane_s8(a: *mut i8, b: int8x8x4_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v8i8")] - fn _vst4_lane_s8( - ptr: *mut i8, - a: int8x8_t, - b: int8x8_t, - c: int8x8_t, - d: int8x8_t, - n: i32, - size: i32, - ); - } - _vst4_lane_s8(a as _, b.0, b.1, b.2, b.3, LANE, 1) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst4_lane_s16(a: *mut i16, b: int16x4x4_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v4i16")] - fn _vst4_lane_s16( - ptr: *mut i8, - a: int16x4_t, - b: int16x4_t, - c: int16x4_t, - d: int16x4_t, - n: i32, - size: i32, - ); - } - _vst4_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst4q_lane_s16(a: *mut i16, b: int16x8x4_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v8i16")] - fn _vst4q_lane_s16( - ptr: *mut i8, - a: int16x8_t, - b: int16x8_t, - c: int16x8_t, - d: int16x8_t, - n: i32, - size: i32, - ); - } - _vst4q_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst4_lane_s32(a: *mut i32, b: int32x2x4_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v2i32")] - fn _vst4_lane_s32( - ptr: *mut i8, - a: int32x2_t, - b: int32x2_t, - c: int32x2_t, - d: int32x2_t, - n: i32, - size: i32, - ); - } - _vst4_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vst4, LANE = 0))] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -pub unsafe fn vst4q_lane_s32(a: *mut i32, b: int32x4x4_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v4i32")] - fn _vst4q_lane_s32( - ptr: *mut i8, - a: int32x4_t, - b: int32x4_t, - c: int32x4_t, - d: int32x4_t, - n: i32, - size: i32, - ); - } - _vst4q_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst4_lane_f32(a: *mut f32, b: float32x2x4_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4lane.v2f32.p0" - )] - fn _vst4_lane_f32( - a: float32x2_t, - b: float32x2_t, - c: float32x2_t, - d: float32x2_t, - n: i64, - ptr: *mut i8, - ); - } - _vst4_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst4q_lane_f32(a: *mut f32, b: float32x4x4_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4lane.v4f32.p0" - )] - fn _vst4q_lane_f32( - a: float32x4_t, - b: float32x4_t, - c: float32x4_t, - d: float32x4_t, - n: i64, - ptr: *mut i8, - ); - } - _vst4q_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst4_lane_s8(a: *mut i8, b: int8x8x4_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4lane.v8i8.p0" - )] - fn _vst4_lane_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, n: i64, ptr: *mut i8); - } - _vst4_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst4_lane_s16(a: *mut i16, b: int16x4x4_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4lane.v4i16.p0" - )] - fn _vst4_lane_s16( - a: int16x4_t, - b: int16x4_t, - c: int16x4_t, - d: int16x4_t, - n: i64, - ptr: *mut i8, - ); - } - _vst4_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst4q_lane_s16(a: *mut i16, b: int16x8x4_t) { - static_assert_uimm_bits!(LANE, 3); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4lane.v8i16.p0" - )] - fn _vst4q_lane_s16( - a: int16x8_t, - b: int16x8_t, - c: int16x8_t, - d: int16x8_t, - n: i64, - ptr: *mut i8, - ); - } - _vst4q_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst4_lane_s32(a: *mut i32, b: int32x2x4_t) { - static_assert_uimm_bits!(LANE, 1); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4lane.v2i32.p0" - )] - fn _vst4_lane_s32( - a: int32x2_t, - b: int32x2_t, - c: int32x2_t, - d: int32x2_t, - n: i64, - ptr: *mut i8, - ); - } - _vst4_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[rustc_legacy_const_generics(2)] -#[cfg_attr(test, assert_instr(st4, LANE = 0))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -pub unsafe fn vst4q_lane_s32(a: *mut i32, b: int32x4x4_t) { - static_assert_uimm_bits!(LANE, 2); - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4lane.v4i32.p0" - )] - fn _vst4q_lane_s32( - a: int32x4_t, - b: int32x4_t, - c: int32x4_t, - d: int32x4_t, - n: i64, - ptr: *mut i8, - ); - } - _vst4q_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4_lane_u8(a: *mut u8, b: uint8x8x4_t) { - static_assert_uimm_bits!(LANE, 3); - vst4_lane_s8::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4_lane_u16(a: *mut u16, b: uint16x4x4_t) { - static_assert_uimm_bits!(LANE, 2); - vst4_lane_s16::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4q_lane_u16(a: *mut u16, b: uint16x8x4_t) { - static_assert_uimm_bits!(LANE, 3); - vst4q_lane_s16::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4_lane_u32(a: *mut u32, b: uint32x2x4_t) { - static_assert_uimm_bits!(LANE, 1); - vst4_lane_s32::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4q_lane_u32(a: *mut u32, b: uint32x4x4_t) { - static_assert_uimm_bits!(LANE, 2); - vst4q_lane_s32::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4_lane_p8(a: *mut p8, b: poly8x8x4_t) { - static_assert_uimm_bits!(LANE, 3); - vst4_lane_s8::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4_lane_p16(a: *mut p16, b: poly16x4x4_t) { - static_assert_uimm_bits!(LANE, 2); - vst4_lane_s16::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4, LANE = 0) -)] -#[rustc_legacy_const_generics(2)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4q_lane_p16(a: *mut p16, b: poly16x8x4_t) { - static_assert_uimm_bits!(LANE, 3); - vst4q_lane_s16::(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,aes")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4_p64(a: *mut p64, b: poly64x1x4_t) { - vst4_s64(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) { - core::ptr::write_unaligned(a.cast(), b) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(not(target_arch = "arm"))] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(nop))] -pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) { - core::ptr::write_unaligned(a.cast(), b) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4_u64(a: *mut u64, b: uint64x1x4_t) { - vst4_s64(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4_u8(a: *mut u8, b: uint8x8x4_t) { - vst4_s8(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4q_u8(a: *mut u8, b: uint8x16x4_t) { - vst4q_s8(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4_u16(a: *mut u16, b: uint16x4x4_t) { - vst4_s16(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4q_u16(a: *mut u16, b: uint16x8x4_t) { - vst4q_s16(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4_u32(a: *mut u32, b: uint32x2x4_t) { - vst4_s32(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4q_u32(a: *mut u32, b: uint32x4x4_t) { - vst4q_s32(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4_p8(a: *mut p8, b: poly8x8x4_t) { - vst4_s8(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4q_p8(a: *mut p8, b: poly8x16x4_t) { - vst4q_s8(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4_p16(a: *mut p16, b: poly16x4x4_t) { - vst4_s16(transmute(a), transmute(b)) -} -#[doc = "Store multiple 4-element structures from four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(st4) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vst4q_p16(a: *mut p16, b: poly16x8x4_t) { - vst4q_s16(transmute(a), transmute(b)) -} -#[doc = "Store SIMD&FP register (immediate offset)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstrq_p128)"] -#[doc = "## Safety"] -#[doc = " * Neon intrinsic unsafe"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(nop) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub unsafe fn vstrq_p128(a: *mut p128, b: p128) { - *a = b -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f16)"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fsub) -)] -#[target_feature(enable = "neon,fp16")] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vsub_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_f16)"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fsub) -)] -#[target_feature(enable = "neon,fp16")] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vsubq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f32)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fsub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vsub_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_f32)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(fsub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vsubq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s32)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s32)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u32)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u32)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s64)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s64)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u64)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - unsafe { simd_sub(a, b) } } -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u64)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_f16)"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(trn1) )] -pub fn vsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s8)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) + assert_instr(trn2) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe { simd_sub(a, b) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vtrn_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { + unsafe { + let a1: float16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: float16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + transmute((a1, b1)) + } } -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_f16)"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(trn1) )] -pub fn vsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u8)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) + assert_instr(trn2) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { simd_sub(a, b) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vtrn_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { + unsafe { + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a1: float16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: float16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + let mut ret_val: float16x4x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val + } } -#[doc = "Subtract"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_f16)"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sub) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(trn1) )] -pub fn vsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - unsafe { simd_sub(a, b) } -} -#[doc = "Subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(subhn2) + assert_instr(trn2) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t { - let d: int8x8_t = vsubhn_s16(b, c); - unsafe { simd_shuffle!(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vtrnq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { + unsafe { + let a1: float16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: float16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + transmute((a1, b1)) + } } -#[doc = "Subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s32)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_f16)"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(subhn2) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(trn1) )] -pub fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t { - let d: int16x4_t = vsubhn_s32(b, c); - unsafe { simd_shuffle!(a, d, [0, 1, 2, 3, 4, 5, 6, 7]) } -} -#[doc = "Subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s64)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(subhn2) + assert_instr(trn2) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t { - let d: int32x2_t = vsubhn_s64(b, c); - unsafe { simd_shuffle!(a, d, [0, 1, 2, 3]) } +#[cfg(not(target_arch = "arm64ec"))] +pub fn vtrnq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { + unsafe { + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a1: float16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: float16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + let mut ret_val: float16x8x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val + } } -#[doc = "Subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u16)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(subhn2) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(zip1) )] -pub fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t { - let d: uint8x8_t = vsubhn_u16(b, c); - unsafe { simd_shuffle!(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } -} -#[doc = "Subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u32)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(subhn2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -70032,41 +66480,27 @@ pub fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t { - let d: uint16x4_t = vsubhn_u32(b, c); - unsafe { simd_shuffle!(a, d, [0, 1, 2, 3, 4, 5, 6, 7]) } +pub fn vtrn_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t { + unsafe { + let a1: float32x2_t = simd_shuffle!(a, b, [0, 2]); + let b1: float32x2_t = simd_shuffle!(a, b, [1, 3]); + transmute((a1, b1)) + } } -#[doc = "Subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u64)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(subhn2) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(zip1) )] -pub fn vsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t { - let d: uint32x2_t = vsubhn_u64(b, c); - unsafe { simd_shuffle!(a, d, [0, 1, 2, 3]) } -} -#[doc = "Subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_s16)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(subhn) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -70076,41 +66510,32 @@ pub fn vsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_ target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t { - let c: i16x8 = i16x8::new(8, 8, 8, 8, 8, 8, 8, 8); - unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } +pub fn vtrn_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t { + unsafe { + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let a1: float32x2_t = simd_shuffle!(a, b, [0, 2]); + let b1: float32x2_t = simd_shuffle!(a, b, [1, 3]); + let mut ret_val: float32x2x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); + ret_val + } } -#[doc = "Subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_s32)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(subhn) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(zip1) )] -pub fn vsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t { - let c: i32x4 = i32x4::new(16, 16, 16, 16); - unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } -} -#[doc = "Subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_s64)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(subhn) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -70120,19 +66545,27 @@ pub fn vsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t { - let c: i64x2 = i64x2::new(32, 32); - unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } +pub fn vtrn_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t { + unsafe { + let a1: int32x2_t = simd_shuffle!(a, b, [0, 2]); + let b1: int32x2_t = simd_shuffle!(a, b, [1, 3]); + transmute((a1, b1)) + } } -#[doc = "Subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_u16)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(subhn) + assert_instr(zip1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -70142,19 +66575,32 @@ pub fn vsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { - let c: u16x8 = u16x8::new(8, 8, 8, 8, 8, 8, 8, 8); - unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } +pub fn vtrn_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t { + unsafe { + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let a1: int32x2_t = simd_shuffle!(a, b, [0, 2]); + let b1: int32x2_t = simd_shuffle!(a, b, [1, 3]); + let mut ret_val: int32x2x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); + ret_val + } } -#[doc = "Subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_u32)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(subhn) + assert_instr(zip1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -70164,19 +66610,27 @@ pub fn vsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { - let c: u32x4 = u32x4::new(16, 16, 16, 16); - unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } +pub fn vtrn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t { + unsafe { + let a1: uint32x2_t = simd_shuffle!(a, b, [0, 2]); + let b1: uint32x2_t = simd_shuffle!(a, b, [1, 3]); + transmute((a1, b1)) + } } -#[doc = "Subtract returning high narrow"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_u64)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(subhn) + assert_instr(zip1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -70186,19 +66640,32 @@ pub fn vsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { - let c: u64x2 = u64x2::new(32, 32); - unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } +pub fn vtrn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t { + unsafe { + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]); + let a1: uint32x2_t = simd_shuffle!(a, b, [0, 2]); + let b1: uint32x2_t = simd_shuffle!(a, b, [1, 3]); + let mut ret_val: uint32x2x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); + ret_val + } } -#[doc = "Signed Subtract Long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_s8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ssubl) + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -70208,22 +66675,27 @@ pub fn vsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t { +pub fn vtrnq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t { unsafe { - let c: int16x8_t = simd_cast(a); - let d: int16x8_t = simd_cast(b); - simd_sub(c, d) + let a1: float32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: float32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + transmute((a1, b1)) } } -#[doc = "Signed Subtract Long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_s16)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ssubl) + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -70233,22 +66705,32 @@ pub fn vsubl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { +pub fn vtrnq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t { unsafe { - let c: int32x4_t = simd_cast(a); - let d: int32x4_t = simd_cast(b); - simd_sub(c, d) + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a1: float32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: float32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + let mut ret_val: float32x4x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } -#[doc = "Signed Subtract Long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_s32)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ssubl) + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -70258,22 +66740,27 @@ pub fn vsubl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { +pub fn vtrn_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t { unsafe { - let c: int64x2_t = simd_cast(a); - let d: int64x2_t = simd_cast(b); - simd_sub(c, d) + let a1: int8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: int8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + transmute((a1, b1)) } } -#[doc = "Unsigned Subtract Long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_u8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usubl) + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -70283,22 +66770,32 @@ pub fn vsubl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t { +pub fn vtrn_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t { unsafe { - let c: uint16x8_t = simd_cast(a); - let d: uint16x8_t = simd_cast(b); - simd_sub(c, d) + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a1: int8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: int8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + let mut ret_val: int8x8x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } -#[doc = "Unsigned Subtract Long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_u16)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usubl) + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -70308,22 +66805,35 @@ pub fn vsubl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { +pub fn vtrnq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { unsafe { - let c: uint32x4_t = simd_cast(a); - let d: uint32x4_t = simd_cast(b); - simd_sub(c, d) + let a1: int8x16_t = simd_shuffle!( + a, + b, + [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + ); + let b1: int8x16_t = simd_shuffle!( + a, + b, + [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] + ); + transmute((a1, b1)) } } -#[doc = "Unsigned Subtract Long"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_u32)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usubl) + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -70333,22 +66843,50 @@ pub fn vsubl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { +pub fn vtrnq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { unsafe { - let c: uint64x2_t = simd_cast(a); - let d: uint64x2_t = simd_cast(b); - simd_sub(c, d) + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let a1: int8x16_t = simd_shuffle!( + a, + b, + [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + ); + let b1: int8x16_t = simd_shuffle!( + a, + b, + [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] + ); + let mut ret_val: int8x16x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val.1 = simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val } } -#[doc = "Signed Subtract Wide"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_s8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ssubw) + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -70358,18 +66896,27 @@ pub fn vsubl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubw_s8(a: int16x8_t, b: int8x8_t) -> int16x8_t { - unsafe { simd_sub(a, simd_cast(b)) } +pub fn vtrn_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { + unsafe { + let a1: int16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: int16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + transmute((a1, b1)) + } } -#[doc = "Signed Subtract Wide"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_s16)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ssubw) + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -70379,18 +66926,32 @@ pub fn vsubw_s8(a: int16x8_t, b: int8x8_t) -> int16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubw_s16(a: int32x4_t, b: int16x4_t) -> int32x4_t { - unsafe { simd_sub(a, simd_cast(b)) } +pub fn vtrn_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { + unsafe { + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a1: int16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: int16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + let mut ret_val: int16x4x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val + } } -#[doc = "Signed Subtract Wide"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_s32)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(ssubw) + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -70400,18 +66961,27 @@ pub fn vsubw_s16(a: int32x4_t, b: int16x4_t) -> int32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubw_s32(a: int64x2_t, b: int32x2_t) -> int64x2_t { - unsafe { simd_sub(a, simd_cast(b)) } +pub fn vtrnq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { + unsafe { + let a1: int16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: int16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + transmute((a1, b1)) + } } -#[doc = "Unsigned Subtract Wide"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_u8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usubw) + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -70421,18 +66991,32 @@ pub fn vsubw_s32(a: int64x2_t, b: int32x2_t) -> int64x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubw_u8(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t { - unsafe { simd_sub(a, simd_cast(b)) } +pub fn vtrnq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { + unsafe { + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a1: int16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: int16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + let mut ret_val: int16x8x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val + } } -#[doc = "Unsigned Subtract Wide"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_u16)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usubw) + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -70442,18 +67026,27 @@ pub fn vsubw_u8(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubw_u16(a: uint32x4_t, b: uint16x4_t) -> uint32x4_t { - unsafe { simd_sub(a, simd_cast(b)) } +pub fn vtrnq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { + unsafe { + let a1: int32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: int32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + transmute((a1, b1)) + } } -#[doc = "Unsigned Subtract Wide"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_u32)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usubw) + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] #[cfg_attr( not(target_arch = "arm"), @@ -70463,1301 +67056,1468 @@ pub fn vsubw_u16(a: uint32x4_t, b: uint16x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t { - unsafe { simd_sub(a, simd_cast(b)) } +pub fn vtrnq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { + unsafe { + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a1: int32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: int32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + let mut ret_val: int32x4x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val + } } -#[doc = "Dot product index form with signed and unsigned integers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_lane_s32)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u8)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sudot, LANE = 0) + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), - unstable(feature = "stdarch_neon_i8mm", issue = "117223") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsudot_lane_s32(a: int32x2_t, b: int8x8_t, c: uint8x8_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 1); +pub fn vtrn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { unsafe { - let c: uint32x2_t = transmute(c); - let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vusdot_s32(a, transmute(c), b) + let a1: uint8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: uint8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + transmute((a1, b1)) } } -#[doc = "Dot product index form with signed and unsigned integers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_lane_s32)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u8)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sudot, LANE = 0) + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), - unstable(feature = "stdarch_neon_i8mm", issue = "117223") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsudot_lane_s32(a: int32x2_t, b: int8x8_t, c: uint8x8_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 1); +pub fn vtrn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: uint32x2_t = transmute(c); - let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - let ret_val: int32x2_t = vusdot_s32(a, transmute(c), b); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a1: uint8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: uint8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + let mut ret_val: uint8x8x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } -#[doc = "Dot product index form with signed and unsigned integers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_lane_s32)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u8)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sudot, LANE = 0) + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), - unstable(feature = "stdarch_neon_i8mm", issue = "117223") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsudotq_lane_s32(a: int32x4_t, b: int8x16_t, c: uint8x8_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 1); +pub fn vtrnq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { unsafe { - let c: uint32x2_t = transmute(c); - let c: uint32x4_t = - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vusdotq_s32(a, transmute(c), b) + let a1: uint8x16_t = simd_shuffle!( + a, + b, + [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + ); + let b1: uint8x16_t = simd_shuffle!( + a, + b, + [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] + ); + transmute((a1, b1)) } } -#[doc = "Dot product index form with signed and unsigned integers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_lane_s32)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u8)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sudot, LANE = 0) + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), - unstable(feature = "stdarch_neon_i8mm", issue = "117223") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vsudotq_lane_s32(a: int32x4_t, b: int8x16_t, c: uint8x8_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 1); +pub fn vtrnq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let b: int8x16_t = + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: uint32x2_t = transmute(c); - let c: uint32x4_t = - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - let ret_val: int32x4_t = vusdotq_s32(a, transmute(c), b); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + let a1: uint8x16_t = simd_shuffle!( + a, + b, + [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + ); + let b1: uint8x16_t = simd_shuffle!( + a, + b, + [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] + ); + let mut ret_val: uint8x16x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val.1 = simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val } } -#[doc = "Dot product index form with signed and unsigned integers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_laneq_s32)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u16)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 1))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sudot, LANE = 3) + assert_instr(trn1) )] -#[rustc_legacy_const_generics(3)] -#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")] -pub fn vsudot_laneq_s32(a: int32x2_t, b: int8x8_t, c: uint8x16_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { - let c: uint32x4_t = transmute(c); - let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vusdot_s32(a, transmute(c), b) - } -} -#[doc = "Dot product index form with signed and unsigned integers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_laneq_s32)"] -#[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 1))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(sudot, LANE = 3) + assert_instr(trn2) )] -#[rustc_legacy_const_generics(3)] -#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")] -pub fn vsudotq_laneq_s32(a: int32x4_t, b: int8x16_t, c: uint8x16_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { - let c: uint32x4_t = transmute(c); - let c: uint32x4_t = - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vusdotq_s32(a, transmute(c), b) - } -} -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -fn vtbl1(a: int8x8_t, b: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl1")] - fn _vtbl1(a: int8x8_t, b: int8x8_t) -> int8x8_t; - } - unsafe { _vtbl1(a, b) } -} -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_s8)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - vtbl1(a, b) -} -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_u8)"] -#[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { transmute(vtbl1(transmute(a), transmute(b))) } -} -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_u8)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(vtbl1(transmute(a), transmute(b))); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } -} -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_p8)"] -#[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t { - unsafe { transmute(vtbl1(transmute(a), transmute(b))) } -} -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_p8)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t { - unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(vtbl1(transmute(a), transmute(b))); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } -} -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -fn vtbl2(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl2")] - fn _vtbl2(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; - } - unsafe { _vtbl2(a, b, c) } -} -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_s8)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t { - vtbl2(a.0, a.1, b) -} -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_u8)"] -#[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t { - unsafe { transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) } -} -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_u8)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t { - let mut a: uint8x8x2_t = a; - unsafe { - a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]); - a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]); - let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } -} -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_p8)"] -#[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t { - unsafe { transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) } -} -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_p8)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t { - let mut a: poly8x8x2_t = a; - unsafe { - a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]); - a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]); - let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } -} -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -fn vtbl3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl3")] - fn _vtbl3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t; - } - unsafe { _vtbl3(a, b, c, d) } -} -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_s8)"] -#[inline] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t { - vtbl3(a.0, a.1, a.2, b) -} -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_u8)"] -#[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t { +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { unsafe { - transmute(vtbl3( - transmute(a.0), - transmute(a.1), - transmute(a.2), - transmute(b), - )) + let a1: uint16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: uint16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + transmute((a1, b1)) } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_u8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t { - let mut a: uint8x8x3_t = a; - unsafe { - a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]); - a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]); - a.2 = simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]); - let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(vtbl3( - transmute(a.0), - transmute(a.1), - transmute(a.2), - transmute(b), - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { + unsafe { + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a1: uint16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: uint16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + let mut ret_val: uint16x4x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_p8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u16)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrnq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { unsafe { - transmute(vtbl3( - transmute(a.0), - transmute(a.1), - transmute(a.2), - transmute(b), - )) + let a1: uint16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: uint16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + transmute((a1, b1)) } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_p8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u16)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t { - let mut a: poly8x8x3_t = a; +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrnq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { unsafe { - a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]); - a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]); - a.2 = simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]); - let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(vtbl3( - transmute(a.0), - transmute(a.1), - transmute(a.2), - transmute(b), - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a1: uint16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: uint16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + let mut ret_val: uint16x8x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -fn vtbl4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl4")] - fn _vtbl4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t; +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrnq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { + unsafe { + let a1: uint32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: uint32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + transmute((a1, b1)) } - unsafe { _vtbl4(a, b, c, d, e) } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_s8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t { - vtbl4(a.0, a.1, a.2, a.3, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrnq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { + unsafe { + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a1: uint32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: uint32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + let mut ret_val: uint32x4x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val + } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_u8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_p8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrn_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { unsafe { - transmute(vtbl4( - transmute(a.0), - transmute(a.1), - transmute(a.2), - transmute(a.3), - transmute(b), - )) + let a1: poly8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: poly8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + transmute((a1, b1)) } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_u8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_p8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t { - let mut a: uint8x8x4_t = a; +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrn_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { unsafe { - a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]); - a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]); - a.2 = simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]); - a.3 = simd_shuffle!(a.3, a.3, [7, 6, 5, 4, 3, 2, 1, 0]); - let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(vtbl4( - transmute(a.0), - transmute(a.1), - transmute(a.2), - transmute(a.3), - transmute(b), - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a1: poly8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: poly8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + let mut ret_val: poly8x8x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_p8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_p8)"] #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrnq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t { unsafe { - transmute(vtbl4( - transmute(a.0), - transmute(a.1), - transmute(a.2), - transmute(a.3), - transmute(b), - )) + let a1: poly8x16_t = simd_shuffle!( + a, + b, + [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + ); + let b1: poly8x16_t = simd_shuffle!( + a, + b, + [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] + ); + transmute((a1, b1)) } } -#[doc = "Table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_p8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_p8)"] #[inline] #[cfg(target_endian = "big")] #[target_feature(enable = "neon")] -#[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbl))] -pub fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t { - let mut a: poly8x8x4_t = a; +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrnq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t { unsafe { - a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]); - a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]); - a.2 = simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]); - a.3 = simd_shuffle!(a.3, a.3, [7, 6, 5, 4, 3, 2, 1, 0]); - let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(vtbl4( - transmute(a.0), - transmute(a.1), - transmute(a.2), - transmute(a.3), - transmute(b), - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } -} -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1)"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -fn vtbx1(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx1")] - fn _vtbx1(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let a1: poly8x16_t = simd_shuffle!( + a, + b, + [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] + ); + let b1: poly8x16_t = simd_shuffle!( + a, + b, + [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] + ); + let mut ret_val: poly8x16x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val.1 = simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val } - unsafe { _vtbx1(a, b, c) } -} -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_s8)"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { - vtbx1(a, b, c) } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_u8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_p16)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { - unsafe { transmute(vtbx1(transmute(a), transmute(b), transmute(c))) } -} -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_u8)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrn_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(vtbx1(transmute(a), transmute(b), transmute(c))); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let a1: poly16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: poly16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + transmute((a1, b1)) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_p8)"] -#[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t { - unsafe { transmute(vtbx1(transmute(a), transmute(b), transmute(c))) } -} -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_p8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_p16)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t { +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrn_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(vtbx1(transmute(a), transmute(b), transmute(c))); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } -} -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2)"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -fn vtbx2(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx2")] - fn _vtbx2(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t; + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a1: poly16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); + let b1: poly16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); + let mut ret_val: poly16x4x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } - unsafe { _vtbx2(a, b, c, d) } -} -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_s8)"] -#[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t { - vtbx2(a, b.0, b.1, c) } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_u8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_p16)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t { - unsafe { - transmute(vtbx2( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(c), - )) - } -} -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_u8)"] -#[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t { - let mut b: uint8x8x2_t = b; +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrnq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t { unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]); - b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(vtbx2( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(c), - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let a1: poly16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: poly16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + transmute((a1, b1)) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_p8)"] +#[doc = "Transpose elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_p16)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t { +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(trn2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtrnq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t { unsafe { - transmute(vtbx2( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(c), - )) + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a1: poly16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: poly16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + let mut ret_val: poly16x8x2_t = transmute((a1, b1)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_p8)"] +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s8)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t { - let mut b: poly8x8x2_t = b; +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtst_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t { unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]); - b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(vtbx2( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(c), - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let c: int8x8_t = simd_and(a, b); + let d: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3)"] +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s8)"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -fn vtbx3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx3")] - fn _vtbx3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t; +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtstq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t { + unsafe { + let c: int8x16_t = simd_and(a, b); + let d: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) } - unsafe { _vtbx3(a, b, c, d, e) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_s8)"] +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s16)"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t { - vtbx3(a, b.0, b.1, b.2, c) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtst_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t { + unsafe { + let c: int16x4_t = simd_and(a, b); + let d: i16x4 = i16x4::new(0, 0, 0, 0); + simd_ne(c, transmute(d)) + } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_u8)"] +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s16)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t { +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtstq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t { unsafe { - transmute(vtbx3( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(c), - )) + let c: int16x8_t = simd_and(a, b); + let d: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_u8)"] +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s32)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t { - let mut b: uint8x8x3_t = b; +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtst_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t { unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]); - b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]); - b.2 = simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(vtbx3( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(c), - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let c: int32x2_t = simd_and(a, b); + let d: i32x2 = i32x2::new(0, 0); + simd_ne(c, transmute(d)) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_p8)"] +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s32)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t { +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtstq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t { unsafe { - transmute(vtbx3( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(c), - )) + let c: int32x4_t = simd_and(a, b); + let d: i32x4 = i32x4::new(0, 0, 0, 0); + simd_ne(c, transmute(d)) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_p8)"] +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_p8)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t { - let mut b: poly8x8x3_t = b; +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtst_p8(a: poly8x8_t, b: poly8x8_t) -> uint8x8_t { unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]); - b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]); - b.2 = simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(vtbx3( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(c), - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let c: poly8x8_t = simd_and(a, b); + let d: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4)"] +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_p8)"] #[inline] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -fn vtbx4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t, f: int8x8_t) -> int8x8_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx4")] - fn _vtbx4( - a: int8x8_t, - b: int8x8_t, - c: int8x8_t, - d: int8x8_t, - e: int8x8_t, - f: int8x8_t, - ) -> int8x8_t; +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtstq_p8(a: poly8x16_t, b: poly8x16_t) -> uint8x16_t { + unsafe { + let c: poly8x16_t = simd_and(a, b); + let d: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) } - unsafe { _vtbx4(a, b, c, d, e, f) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_s8)"] +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_p16)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t { +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtst_p16(a: poly16x4_t, b: poly16x4_t) -> uint16x4_t { unsafe { - vtbx4( - a, - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(b.3), - c, - ) + let c: poly16x4_t = simd_and(a, b); + let d: i16x4 = i16x4::new(0, 0, 0, 0); + simd_ne(c, transmute(d)) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_s8)"] +#[doc = "Signed compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_p16)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t { - let mut b: int8x8x4_t = b; +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtstq_p16(a: poly16x8_t, b: poly16x8_t) -> uint16x8_t { unsafe { - let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]); - b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]); - b.2 = simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]); - b.3 = simd_shuffle!(b.3, b.3, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: int8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: int8x8_t = vtbx4( - a, - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(b.3), - c, - ); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let c: poly16x8_t = simd_and(a, b); + let d: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_u8)"] +#[doc = "Unsigned compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u8)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t { +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtst_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { unsafe { - transmute(vtbx4( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(b.3), - transmute(c), - )) + let c: uint8x8_t = simd_and(a, b); + let d: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_u8)"] +#[doc = "Unsigned compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u8)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t { - let mut b: uint8x8x4_t = b; +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtstq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { unsafe { - let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]); - b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]); - b.2 = simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]); - b.3 = simd_shuffle!(b.3, b.3, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: uint8x8_t = transmute(vtbx4( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(b.3), - transmute(c), - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let c: uint8x16_t = simd_and(a, b); + let d: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_p8)"] +#[doc = "Unsigned compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u16)"] #[inline] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t { +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtst_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { unsafe { - transmute(vtbx4( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(b.3), - transmute(c), - )) + let c: uint16x4_t = simd_and(a, b); + let d: u16x4 = u16x4::new(0, 0, 0, 0); + simd_ne(c, transmute(d)) } } -#[doc = "Extended table look-up"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_p8)"] +#[doc = "Unsigned compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u16)"] #[inline] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon,v7")] -#[cfg(target_arch = "arm")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(test, assert_instr(vtbx))] -pub fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t { - let mut b: poly8x8x4_t = b; +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vtstq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { unsafe { - let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); - b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]); - b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]); - b.2 = simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]); - b.3 = simd_shuffle!(b.3, b.3, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); - let ret_val: poly8x8_t = transmute(vtbx4( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(b.3), - transmute(c), - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let c: uint16x8_t = simd_and(a, b); + let d: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_f16)"] +#[doc = "Unsigned compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u32)"] #[inline] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(cmtst) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] +pub fn vtst_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + unsafe { + let c: uint32x2_t = simd_and(a, b); + let d: u32x2 = u32x2::new(0, 0); + simd_ne(c, transmute(d)) + } +} +#[doc = "Unsigned compare bitwise Test bits nonzero"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u32)"] +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(cmtst) )] -#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vtrn_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { +pub fn vtstq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { unsafe { - let a1: float16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); - let b1: float16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); - transmute((a1, b1)) + let c: uint32x4_t = simd_and(a, b); + let d: u32x4 = u32x4::new(0, 0, 0, 0); + simd_ne(c, transmute(d)) } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_f16)"] +#[doc = "Dot product index form with unsigned and signed integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_lane_s32)"] #[inline] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(usdot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] +pub fn vusdot_lane_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + let c = vreinterpret_s32_s8(c); + let c = vdup_lane_s32::(c); + vusdot_s32(a, b, vreinterpret_s8_s32(c)) +} +#[doc = "Dot product index form with unsigned and signed integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_lane_s32)"] +#[inline] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(usdot, LANE = 0) )] -#[target_feature(enable = "neon,fp16")] +#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + unstable(feature = "stdarch_neon_i8mm", issue = "117223") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vtrnq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { - unsafe { - let a1: float16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); - let b1: float16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); - transmute((a1, b1)) - } +pub fn vusdotq_lane_s32(a: int32x4_t, b: uint8x16_t, c: int8x8_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 1); + let c = vreinterpret_s32_s8(c); + let c = vdupq_lane_s32::(c); + vusdotq_s32(a, b, vreinterpretq_s8_s32(c)) } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_f32)"] +#[doc = "Dot product index form with unsigned and signed integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_laneq_s32)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(zip1) + assert_instr(usdot, LANE = 3) )] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")] +pub fn vusdot_laneq_s32(a: int32x2_t, b: uint8x8_t, c: int8x16_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 2); + let c = vreinterpretq_s32_s8(c); + let c = vdup_laneq_s32::(c); + vusdot_s32(a, b, vreinterpret_s8_s32(c)) +} +#[doc = "Dot product index form with unsigned and signed integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_laneq_s32)"] +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(zip2) + assert_instr(usdot, LANE = 3) +)] +#[rustc_legacy_const_generics(3)] +#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")] +pub fn vusdotq_laneq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 2); + let c = vreinterpretq_s32_s8(c); + let c = vdupq_laneq_s32::(c); + vusdotq_s32(a, b, vreinterpretq_s8_s32(c)) +} +#[doc = "Dot product vector form with unsigned and signed integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot))] +#[cfg_attr( + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(usdot) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + unstable(feature = "stdarch_neon_i8mm", issue = "117223") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrn_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t { +pub fn vusdot_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.usdot.v2i32.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usdot.v2i32.v8i8")] + fn _vusdot_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t; + } + unsafe { _vusdot_s32(a, b, c) } +} +#[doc = "Dot product vector form with unsigned and signed integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot))] +#[cfg_attr( + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(usdot) +)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vusdot_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.usdot.v2i32.v8i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usdot.v2i32.v8i8")] + fn _vusdot_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t; + } unsafe { - let a1: float32x2_t = simd_shuffle!(a, b, [0, 2]); - let b1: float32x2_t = simd_shuffle!(a, b, [1, 3]); - transmute((a1, b1)) + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let c: int8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int32x2_t = _vusdot_s32(a, b, c); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s32)"] +#[doc = "Dot product vector form with unsigned and signed integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_s32)"] #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(zip1) + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(usdot) )] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(zip2) + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vusdotq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.usdot.v4i32.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usdot.v4i32.v16i8")] + fn _vusdotq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t; + } + unsafe { _vusdotq_s32(a, b, c) } +} +#[doc = "Dot product vector form with unsigned and signed integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot))] +#[cfg_attr( + all( + test, + any(target_arch = "aarch64", target_arch = "arm64ec"), + target_endian = "little" + ), + assert_instr(usdot) )] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + unstable(feature = "stdarch_neon_i8mm", issue = "117223") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrn_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t { +pub fn vusdotq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.usdot.v4i32.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usdot.v4i32.v16i8")] + fn _vusdotq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t; + } unsafe { - let a1: int32x2_t = simd_shuffle!(a, b, [0, 2]); - let b1: int32x2_t = simd_shuffle!(a, b, [1, 3]); - transmute((a1, b1)) + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let c: int8x16_t = + simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let ret_val: int32x4_t = _vusdotq_s32(a, b, c); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u32)"] +#[doc = "Unsigned and signed 8-bit integer matrix multiply-accumulate"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusmmlaq_s32)"] #[inline] -#[target_feature(enable = "neon")] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usmmla) +)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vusmmlaq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.usmmla.v4i32.v16i8" + )] + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usmmla.v4i32.v16i8")] + fn _vusmmlaq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t; + } + unsafe { _vusmmlaq_s32(a, b, c) } +} +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_f16)"] +#[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(zip1) + assert_instr(uzp1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(zip2) + assert_instr(uzp2) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vuzp_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { unsafe { - let a1: uint32x2_t = simd_shuffle!(a, b, [0, 2]); - let b1: uint32x2_t = simd_shuffle!(a, b, [1, 3]); - transmute((a1, b1)) + let a0: float16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: float16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + transmute((a0, b0)) } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_f32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_f16)"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(uzp1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(uzp2) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrnq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vuzp_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { unsafe { - let a1: float32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); - let b1: float32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); - transmute((a1, b1)) + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a0: float16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: float16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + let mut ret_val: float16x4x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_f16)"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(uzp1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(uzp2) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrn_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vuzpq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { unsafe { - let a1: int8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); - let b1: int8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); - transmute((a1, b1)) + let a0: float16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: float16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + transmute((a0, b0)) } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_f16)"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(uzp1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(uzp2) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrnq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vuzpq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { unsafe { - let a1: int8x16_t = simd_shuffle!( - a, - b, - [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] - ); - let b1: int8x16_t = simd_shuffle!( - a, - b, - [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] - ); - transmute((a1, b1)) + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a0: float16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: float16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + let mut ret_val: float16x8x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71767,26 +68527,27 @@ pub fn vtrnq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrn_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { +pub fn vuzp_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t { unsafe { - let a1: int16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); - let b1: int16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); - transmute((a1, b1)) + let a0: float32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: float32x2_t = simd_shuffle!(a, b, [1, 3]); + transmute((a0, b0)) } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71796,26 +68557,32 @@ pub fn vtrn_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrnq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { +pub fn vuzp_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t { unsafe { - let a1: int16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); - let b1: int16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); - transmute((a1, b1)) + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let a0: float32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: float32x2_t = simd_shuffle!(a, b, [1, 3]); + let mut ret_val: float32x2x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); + ret_val } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71825,26 +68592,27 @@ pub fn vtrnq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrnq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { +pub fn vuzp_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t { unsafe { - let a1: int32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); - let b1: int32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); - transmute((a1, b1)) + let a0: int32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: int32x2_t = simd_shuffle!(a, b, [1, 3]); + transmute((a0, b0)) } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71854,26 +68622,32 @@ pub fn vtrnq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { +pub fn vuzp_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t { unsafe { - let a1: uint8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); - let b1: uint8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); - transmute((a1, b1)) + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let a0: int32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: int32x2_t = simd_shuffle!(a, b, [1, 3]); + let mut ret_val: int32x2x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); + ret_val } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71883,34 +68657,27 @@ pub fn vtrn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrnq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { +pub fn vuzp_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t { unsafe { - let a1: uint8x16_t = simd_shuffle!( - a, - b, - [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] - ); - let b1: uint8x16_t = simd_shuffle!( - a, - b, - [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] - ); - transmute((a1, b1)) + let a0: uint32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: uint32x2_t = simd_shuffle!(a, b, [1, 3]); + transmute((a0, b0)) } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71920,26 +68687,32 @@ pub fn vtrnq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { +pub fn vuzp_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t { unsafe { - let a1: uint16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); - let b1: uint16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); - transmute((a1, b1)) + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]); + let a0: uint32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: uint32x2_t = simd_shuffle!(a, b, [1, 3]); + let mut ret_val: uint32x2x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); + ret_val } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(uzp1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71949,26 +68722,27 @@ pub fn vtrn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrnq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { +pub fn vuzpq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t { unsafe { - let a1: uint16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); - let b1: uint16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); - transmute((a1, b1)) + let a0: float32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: float32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + transmute((a0, b0)) } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(uzp1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -71978,26 +68752,32 @@ pub fn vtrnq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrnq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { +pub fn vuzpq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t { unsafe { - let a1: uint32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); - let b1: uint32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); - transmute((a1, b1)) + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a0: float32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: float32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + let mut ret_val: float32x4x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_p8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(uzp1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72007,26 +68787,27 @@ pub fn vtrnq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrn_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { +pub fn vuzp_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t { unsafe { - let a1: poly8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); - let b1: poly8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); - transmute((a1, b1)) + let a0: int8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: int8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + transmute((a0, b0)) } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_p8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(uzp1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72036,34 +68817,32 @@ pub fn vtrn_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrnq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t { +pub fn vuzp_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t { unsafe { - let a1: poly8x16_t = simd_shuffle!( - a, - b, - [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] - ); - let b1: poly8x16_t = simd_shuffle!( - a, - b, - [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31] - ); - transmute((a1, b1)) + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a0: int8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: int8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + let mut ret_val: int8x8x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_p16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(uzp1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72073,26 +68852,35 @@ pub fn vtrnq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrn_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { +pub fn vuzpq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { unsafe { - let a1: poly16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]); - let b1: poly16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]); - transmute((a1, b1)) + let a0: int8x16_t = simd_shuffle!( + a, + b, + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] + ); + let b0: int8x16_t = simd_shuffle!( + a, + b, + [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + ); + transmute((a0, b0)) } } -#[doc = "Transpose elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_p16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn1) + assert_instr(uzp1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(trn2) + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72102,22 +68890,50 @@ pub fn vtrn_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtrnq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t { +pub fn vuzpq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { unsafe { - let a1: poly16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); - let b1: poly16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); - transmute((a1, b1)) + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let a0: int8x16_t = simd_shuffle!( + a, + b, + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] + ); + let b0: int8x16_t = simd_shuffle!( + a, + b, + [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + ); + let mut ret_val: int8x16x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val.1 = simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val } } -#[doc = "Signed compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72127,22 +68943,27 @@ pub fn vtrnq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtst_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t { +pub fn vuzp_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { unsafe { - let c: int8x8_t = simd_and(a, b); - let d: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); - simd_ne(c, transmute(d)) + let a0: int16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: int16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + transmute((a0, b0)) } } -#[doc = "Signed compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72152,22 +68973,32 @@ pub fn vtst_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtstq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t { +pub fn vuzp_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { unsafe { - let c: int8x16_t = simd_and(a, b); - let d: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); - simd_ne(c, transmute(d)) + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a0: int16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: int16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + let mut ret_val: int16x4x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } -#[doc = "Signed compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72177,22 +69008,27 @@ pub fn vtstq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtst_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t { +pub fn vuzpq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { unsafe { - let c: int16x4_t = simd_and(a, b); - let d: i16x4 = i16x4::new(0, 0, 0, 0); - simd_ne(c, transmute(d)) + let a0: int16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: int16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + transmute((a0, b0)) } } -#[doc = "Signed compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72202,22 +69038,32 @@ pub fn vtst_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtstq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t { +pub fn vuzpq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { unsafe { - let c: int16x8_t = simd_and(a, b); - let d: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); - simd_ne(c, transmute(d)) + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a0: int16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: int16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + let mut ret_val: int16x8x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } -#[doc = "Signed compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72227,22 +69073,27 @@ pub fn vtstq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtst_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t { +pub fn vuzpq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { unsafe { - let c: int32x2_t = simd_and(a, b); - let d: i32x2 = i32x2::new(0, 0); - simd_ne(c, transmute(d)) + let a0: int32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: int32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + transmute((a0, b0)) } } -#[doc = "Signed compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72252,22 +69103,32 @@ pub fn vtst_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtstq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t { +pub fn vuzpq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { unsafe { - let c: int32x4_t = simd_and(a, b); - let d: i32x4 = i32x4::new(0, 0, 0, 0); - simd_ne(c, transmute(d)) + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a0: int32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: int32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + let mut ret_val: int32x4x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } -#[doc = "Signed compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_p8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72277,22 +69138,27 @@ pub fn vtstq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtst_p8(a: poly8x8_t, b: poly8x8_t) -> uint8x8_t { +pub fn vuzp_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { unsafe { - let c: poly8x8_t = simd_and(a, b); - let d: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); - simd_ne(c, transmute(d)) + let a0: uint8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: uint8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + transmute((a0, b0)) } } -#[doc = "Signed compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_p8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72302,22 +69168,32 @@ pub fn vtst_p8(a: poly8x8_t, b: poly8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtstq_p8(a: poly8x16_t, b: poly8x16_t) -> uint8x16_t { +pub fn vuzp_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { unsafe { - let c: poly8x16_t = simd_and(a, b); - let d: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); - simd_ne(c, transmute(d)) + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a0: uint8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: uint8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + let mut ret_val: uint8x8x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } -#[doc = "Signed compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_p16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72327,22 +69203,35 @@ pub fn vtstq_p8(a: poly8x16_t, b: poly8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtst_p16(a: poly16x4_t, b: poly16x4_t) -> uint16x4_t { +pub fn vuzpq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { unsafe { - let c: poly16x4_t = simd_and(a, b); - let d: i16x4 = i16x4::new(0, 0, 0, 0); - simd_ne(c, transmute(d)) + let a0: uint8x16_t = simd_shuffle!( + a, + b, + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] + ); + let b0: uint8x16_t = simd_shuffle!( + a, + b, + [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + ); + transmute((a0, b0)) } } -#[doc = "Signed compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_p16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72352,22 +69241,50 @@ pub fn vtst_p16(a: poly16x4_t, b: poly16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtstq_p16(a: poly16x8_t, b: poly16x8_t) -> uint16x8_t { +pub fn vuzpq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { unsafe { - let c: poly16x8_t = simd_and(a, b); - let d: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); - simd_ne(c, transmute(d)) + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let a0: uint8x16_t = simd_shuffle!( + a, + b, + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] + ); + let b0: uint8x16_t = simd_shuffle!( + a, + b, + [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + ); + let mut ret_val: uint8x16x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val.1 = simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val } } -#[doc = "Unsigned compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72377,22 +69294,27 @@ pub fn vtstq_p16(a: poly16x8_t, b: poly16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtst_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { +pub fn vuzp_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { unsafe { - let c: uint8x8_t = simd_and(a, b); - let d: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0); - simd_ne(c, transmute(d)) + let a0: uint16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: uint16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + transmute((a0, b0)) } } -#[doc = "Unsigned compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u8)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72402,22 +69324,32 @@ pub fn vtst_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtstq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { +pub fn vuzp_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { unsafe { - let c: uint8x16_t = simd_and(a, b); - let d: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); - simd_ne(c, transmute(d)) + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a0: uint16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: uint16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + let mut ret_val: uint16x4x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } -#[doc = "Unsigned compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72427,22 +69359,27 @@ pub fn vtstq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtst_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { +pub fn vuzpq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { unsafe { - let c: uint16x4_t = simd_and(a, b); - let d: u16x4 = u16x4::new(0, 0, 0, 0); - simd_ne(c, transmute(d)) + let a0: uint16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: uint16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + transmute((a0, b0)) } } -#[doc = "Unsigned compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u16)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72452,22 +69389,32 @@ pub fn vtst_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtstq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { +pub fn vuzpq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { unsafe { - let c: uint16x8_t = simd_and(a, b); - let d: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0); - simd_ne(c, transmute(d)) + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a0: uint16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: uint16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + let mut ret_val: uint16x8x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } -#[doc = "Unsigned compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72477,22 +69424,27 @@ pub fn vtstq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtst_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { +pub fn vuzpq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { unsafe { - let c: uint32x2_t = simd_and(a, b); - let d: u32x2 = u32x2::new(0, 0); - simd_ne(c, transmute(d)) + let a0: uint32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: uint32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + transmute((a0, b0)) } } -#[doc = "Unsigned compare bitwise Test bits nonzero"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(cmtst) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72502,323 +69454,274 @@ pub fn vtst_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vtstq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { +pub fn vuzpq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { unsafe { - let c: uint32x4_t = simd_and(a, b); - let d: u32x4 = u32x4::new(0, 0, 0, 0); - simd_ne(c, transmute(d)) + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a0: uint32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: uint32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + let mut ret_val: uint32x4x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } -#[doc = "Dot product index form with unsigned and signed integers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_lane_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_p8)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usdot, LANE = 0) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), - unstable(feature = "stdarch_neon_i8mm", issue = "117223") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vusdot_lane_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 1); +pub fn vuzp_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { unsafe { - let c: int32x2_t = transmute(c); - let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vusdot_s32(a, b, transmute(c)) + let a0: poly8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: poly8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + transmute((a0, b0)) } } -#[doc = "Dot product index form with unsigned and signed integers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_lane_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_p8)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usdot, LANE = 0) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), - unstable(feature = "stdarch_neon_i8mm", issue = "117223") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vusdot_lane_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 1); +pub fn vuzp_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: int8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: int32x2_t = transmute(c); - let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - let ret_val: int32x2_t = vusdot_s32(a, b, transmute(c)); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a0: poly8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: poly8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + let mut ret_val: poly8x8x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } -#[doc = "Dot product index form with unsigned and signed integers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_lane_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_p8)"] #[inline] #[cfg(target_endian = "little")] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usdot, LANE = 0) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), - unstable(feature = "stdarch_neon_i8mm", issue = "117223") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vusdotq_lane_s32(a: int32x4_t, b: uint8x16_t, c: int8x8_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 1); +pub fn vuzpq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t { unsafe { - let c: int32x2_t = transmute(c); - let c: int32x4_t = - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vusdotq_s32(a, b, transmute(c)) + let a0: poly8x16_t = simd_shuffle!( + a, + b, + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] + ); + let b0: poly8x16_t = simd_shuffle!( + a, + b, + [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + ); + transmute((a0, b0)) } } -#[doc = "Dot product index form with unsigned and signed integers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_lane_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_p8)"] #[inline] #[cfg(target_endian = "big")] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usdot, LANE = 0) + assert_instr(uzp1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(uzp2) )] -#[rustc_legacy_const_generics(3)] #[cfg_attr( not(target_arch = "arm"), - unstable(feature = "stdarch_neon_i8mm", issue = "117223") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vusdotq_lane_s32(a: int32x4_t, b: uint8x16_t, c: int8x8_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 1); +pub fn vuzpq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t { unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let b: uint8x16_t = + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x16_t = simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let c: int8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: int32x2_t = transmute(c); - let c: int32x4_t = - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - let ret_val: int32x4_t = vusdotq_s32(a, b, transmute(c)); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + let a0: poly8x16_t = simd_shuffle!( + a, + b, + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] + ); + let b0: poly8x16_t = simd_shuffle!( + a, + b, + [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + ); + let mut ret_val: poly8x16x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val.1 = simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val } } -#[doc = "Dot product index form with unsigned and signed integers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_laneq_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_p16)"] #[inline] #[cfg(target_endian = "little")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 3))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usdot, LANE = 3) + assert_instr(uzp1) )] -#[rustc_legacy_const_generics(3)] -#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")] -pub fn vusdot_laneq_s32(a: int32x2_t, b: uint8x8_t, c: int8x16_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { - let c: int32x4_t = transmute(c); - let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vusdot_s32(a, b, transmute(c)) - } -} -#[doc = "Dot product index form with unsigned and signed integers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_laneq_s32)"] -#[inline] -#[cfg(target_endian = "big")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 3))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usdot, LANE = 3) + assert_instr(uzp2) )] -#[rustc_legacy_const_generics(3)] -#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")] -pub fn vusdot_laneq_s32(a: int32x2_t, b: uint8x8_t, c: int8x16_t) -> int32x2_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { - let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); - let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); - let c: int8x16_t = - simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let c: int32x4_t = transmute(c); - let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - let ret_val: int32x2_t = vusdot_s32(a, b, transmute(c)); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } -} -#[doc = "Dot product index form with unsigned and signed integers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_laneq_s32)"] -#[inline] -#[cfg(target_endian = "little")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 3))] #[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usdot, LANE = 3) + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") )] -#[rustc_legacy_const_generics(3)] -#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")] -pub fn vusdotq_laneq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 2); +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vuzp_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { unsafe { - let c: int32x4_t = transmute(c); - let c: int32x4_t = - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vusdotq_s32(a, b, transmute(c)) + let a0: poly16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: poly16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + transmute((a0, b0)) } } -#[doc = "Dot product index form with unsigned and signed integers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_laneq_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_p16)"] #[inline] #[cfg(target_endian = "big")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 3))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usdot, LANE = 3) + assert_instr(uzp1) )] -#[rustc_legacy_const_generics(3)] -#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")] -pub fn vusdotq_laneq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t { - static_assert_uimm_bits!(LANE, 2); - unsafe { - let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); - let b: uint8x16_t = - simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let c: int8x16_t = - simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); - let c: int32x4_t = transmute(c); - let c: int32x4_t = - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - let ret_val: int32x4_t = vusdotq_s32(a, b, transmute(c)); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } -} -#[doc = "Dot product vector form with unsigned and signed integers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_s32)"] -#[inline] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usdot) + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), - unstable(feature = "stdarch_neon_i8mm", issue = "117223") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vusdot_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.usdot.v2i32.v8i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usdot.v2i32.v8i8")] - fn _vusdot_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t; +pub fn vuzp_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { + unsafe { + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a0: poly16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); + let b0: poly16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + let mut ret_val: poly16x4x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } - unsafe { _vusdot_s32(a, b, c) } } -#[doc = "Dot product vector form with unsigned and signed integers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_s32)"] +#[doc = "Unzip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_p16)"] #[inline] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot))] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usdot) -)] -#[cfg_attr( - not(target_arch = "arm"), - unstable(feature = "stdarch_neon_i8mm", issue = "117223") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") + assert_instr(uzp1) )] -pub fn vusdotq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.usdot.v4i32.v16i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usdot.v4i32.v16i8")] - fn _vusdotq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t; - } - unsafe { _vusdotq_s32(a, b, c) } -} -#[doc = "Unsigned and signed 8-bit integer matrix multiply-accumulate"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusmmlaq_s32)"] -#[inline] -#[target_feature(enable = "neon,i8mm")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(usmmla) + assert_instr(uzp2) )] #[cfg_attr( not(target_arch = "arm"), - unstable(feature = "stdarch_neon_i8mm", issue = "117223") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vusmmlaq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.usmmla.v4i32.v16i8" - )] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usmmla.v4i32.v16i8")] - fn _vusmmlaq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t; +pub fn vuzpq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t { + unsafe { + let a0: poly16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: poly16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + transmute((a0, b0)) } - unsafe { _vusmmlaq_s32(a, b, c) } } #[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_p16)"] #[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] #[cfg_attr( @@ -72829,35 +69732,39 @@ pub fn vusmmlaq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t { all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp2) )] -#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vuzp_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { +pub fn vuzpq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t { unsafe { - let a0: float16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); - let b0: float16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); - transmute((a0, b0)) + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a0: poly16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: poly16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + let mut ret_val: poly16x8x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_f16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_f16)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vzip.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[target_feature(enable = "neon,fp16")] #[cfg_attr( @@ -72869,19 +69776,19 @@ pub fn vuzp_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] #[cfg(not(target_arch = "arm64ec"))] -pub fn vuzpq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { +pub fn vzip_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { unsafe { - let a0: float16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); - let b0: float16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + let a0: float16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: float16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); transmute((a0, b0)) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_f32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_f16)"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vzip.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip1) @@ -72890,27 +69797,34 @@ pub fn vuzpq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip2) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzp_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vzip_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { unsafe { - let a0: float32x2_t = simd_shuffle!(a, b, [0, 2]); - let b0: float32x2_t = simd_shuffle!(a, b, [1, 3]); - transmute((a0, b0)) + let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a0: float16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: float16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + let mut ret_val: float16x4x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_f16)"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vzip.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip1) @@ -72919,27 +69833,29 @@ pub fn vuzp_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t { all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip2) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzp_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vzipq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { unsafe { - let a0: int32x2_t = simd_shuffle!(a, b, [0, 2]); - let b0: int32x2_t = simd_shuffle!(a, b, [1, 3]); + let a0: float16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: float16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); transmute((a0, b0)) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_f16)"] #[inline] -#[target_feature(enable = "neon")] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vzip.16"))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip1) @@ -72948,34 +69864,42 @@ pub fn vuzp_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t { all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip2) )] +#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") + stable(feature = "stdarch_neon_fp16", since = "1.94.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzp_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t { +#[cfg(not(target_arch = "arm64ec"))] +pub fn vzipq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { unsafe { - let a0: uint32x2_t = simd_shuffle!(a, b, [0, 2]); - let b0: uint32x2_t = simd_shuffle!(a, b, [1, 3]); - transmute((a0, b0)) + let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a0: float16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: float16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + let mut ret_val: float16x8x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_f32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -72985,26 +69909,27 @@ pub fn vuzp_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzpq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t { +pub fn vzip_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t { unsafe { - let a0: float32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); - let b0: float32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + let a0: float32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: float32x2_t = simd_shuffle!(a, b, [1, 3]); transmute((a0, b0)) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -73014,26 +69939,32 @@ pub fn vuzpq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzp_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t { +pub fn vzip_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t { unsafe { - let a0: int8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); - let b0: int8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); - transmute((a0, b0)) + let a: float32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: float32x2_t = simd_shuffle!(b, b, [1, 0]); + let a0: float32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: float32x2_t = simd_shuffle!(a, b, [1, 3]); + let mut ret_val: float32x2x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); + ret_val } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -73043,34 +69974,27 @@ pub fn vuzp_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzpq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { +pub fn vzip_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t { unsafe { - let a0: int8x16_t = simd_shuffle!( - a, - b, - [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] - ); - let b0: int8x16_t = simd_shuffle!( - a, - b, - [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] - ); + let a0: int32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: int32x2_t = simd_shuffle!(a, b, [1, 3]); transmute((a0, b0)) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -73080,26 +70004,32 @@ pub fn vuzpq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzp_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { +pub fn vzip_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t { unsafe { - let a0: int16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); - let b0: int16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); - transmute((a0, b0)) + let a: int32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: int32x2_t = simd_shuffle!(b, b, [1, 0]); + let a0: int32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: int32x2_t = simd_shuffle!(a, b, [1, 3]); + let mut ret_val: int32x2x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); + ret_val } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -73109,26 +70039,27 @@ pub fn vuzp_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzpq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { +pub fn vzip_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t { unsafe { - let a0: int16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); - let b0: int16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + let a0: uint32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: uint32x2_t = simd_shuffle!(a, b, [1, 3]); transmute((a0, b0)) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -73138,26 +70069,32 @@ pub fn vuzpq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzpq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { +pub fn vzip_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t { unsafe { - let a0: int32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); - let b0: int32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); - transmute((a0, b0)) + let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]); + let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]); + let a0: uint32x2_t = simd_shuffle!(a, b, [0, 2]); + let b0: uint32x2_t = simd_shuffle!(a, b, [1, 3]); + let mut ret_val: uint32x2x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]); + ret_val } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -73167,26 +70104,27 @@ pub fn vuzpq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzp_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { +pub fn vzip_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t { unsafe { - let a0: uint8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); - let b0: uint8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + let a0: int8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: int8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); transmute((a0, b0)) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -73196,34 +70134,32 @@ pub fn vuzp_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzpq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { +pub fn vzip_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t { unsafe { - let a0: uint8x16_t = simd_shuffle!( - a, - b, - [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] - ); - let b0: uint8x16_t = simd_shuffle!( - a, - b, - [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] - ); - transmute((a0, b0)) + let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a0: int8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: int8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + let mut ret_val: int8x8x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -73233,26 +70169,27 @@ pub fn vuzpq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzp_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { +pub fn vzip_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { unsafe { - let a0: uint16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); - let b0: uint16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + let a0: int16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: int16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); transmute((a0, b0)) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -73262,26 +70199,32 @@ pub fn vuzp_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzpq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { +pub fn vzip_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { unsafe { - let a0: uint16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); - let b0: uint16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); - transmute((a0, b0)) + let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a0: int16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: int16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + let mut ret_val: int16x4x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u32)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -73291,26 +70234,27 @@ pub fn vuzpq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzpq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { +pub fn vzip_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { unsafe { - let a0: uint32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); - let b0: uint32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); + let a0: uint8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: uint8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); transmute((a0, b0)) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_p8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -73320,26 +70264,32 @@ pub fn vuzpq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzp_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { +pub fn vzip_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { unsafe { - let a0: poly8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); - let b0: poly8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); - transmute((a0, b0)) + let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a0: uint8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: uint8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + let mut ret_val: uint8x8x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_p8)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -73349,34 +70299,27 @@ pub fn vuzp_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzpq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t { +pub fn vzip_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { unsafe { - let a0: poly8x16_t = simd_shuffle!( - a, - b, - [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] - ); - let b0: poly8x16_t = simd_shuffle!( - a, - b, - [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] - ); + let a0: uint16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: uint16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); transmute((a0, b0)) } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_p16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -73386,26 +70329,32 @@ pub fn vuzpq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzp_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { +pub fn vzip_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { unsafe { - let a0: poly16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]); - let b0: poly16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]); - transmute((a0, b0)) + let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a0: uint16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: uint16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + let mut ret_val: uint16x4x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } -#[doc = "Unzip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_p16)"] +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp1) + assert_instr(zip1) )] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(uzp2) + assert_instr(zip2) )] #[cfg_attr( not(target_arch = "arm"), @@ -73415,18 +70364,20 @@ pub fn vuzp_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vuzpq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t { +pub fn vzip_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { unsafe { - let a0: poly16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); - let b0: poly16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + let a0: poly8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: poly8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); transmute((a0, b0)) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_p8)"] #[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vzip.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip1) @@ -73435,28 +70386,33 @@ pub fn vuzpq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t { all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip2) )] -#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vzip_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { +pub fn vzip_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { unsafe { - let a0: float16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); - let b0: float16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); - transmute((a0, b0)) + let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a0: poly8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: poly8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + let mut ret_val: poly8x8x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_f16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_p16)"] #[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vzip.16"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip1) @@ -73465,29 +70421,28 @@ pub fn vzip_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t { all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip2) )] -#[target_feature(enable = "neon,fp16")] #[cfg_attr( not(target_arch = "arm"), - stable(feature = "stdarch_neon_fp16", since = "1.94.0") + stable(feature = "neon_intrinsics", since = "1.59.0") )] #[cfg_attr( target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -#[cfg(not(target_arch = "arm64ec"))] -pub fn vzipq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { +pub fn vzip_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { unsafe { - let a0: float16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); - let b0: float16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + let a0: poly16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: poly16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); transmute((a0, b0)) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_p16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip1) @@ -73504,19 +70459,25 @@ pub fn vzipq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzip_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t { +pub fn vzip_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { unsafe { - let a0: float32x2_t = simd_shuffle!(a, b, [0, 2]); - let b0: float32x2_t = simd_shuffle!(a, b, [1, 3]); - transmute((a0, b0)) + let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a0: poly16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: poly16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + let mut ret_val: poly16x4x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_f32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip1) @@ -73533,19 +70494,20 @@ pub fn vzip_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzip_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t { +pub fn vzipq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t { unsafe { - let a0: int32x2_t = simd_shuffle!(a, b, [0, 2]); - let b0: int32x2_t = simd_shuffle!(a, b, [1, 3]); + let a0: float32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: float32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); transmute((a0, b0)) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_f32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip1) @@ -73562,19 +70524,25 @@ pub fn vzip_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzip_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t { +pub fn vzipq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t { unsafe { - let a0: uint32x2_t = simd_shuffle!(a, b, [0, 2]); - let b0: uint32x2_t = simd_shuffle!(a, b, [1, 3]); - transmute((a0, b0)) + let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a0: float32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: float32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + let mut ret_val: float32x4x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip1) @@ -73591,19 +70559,28 @@ pub fn vzip_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzip_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t { +pub fn vzipq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { unsafe { - let a0: int8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); - let b0: int8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + let a0: int8x16_t = simd_shuffle!( + a, + b, + [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] + ); + let b0: int8x16_t = simd_shuffle!( + a, + b, + [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] + ); transmute((a0, b0)) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip1) @@ -73620,19 +70597,43 @@ pub fn vzip_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzip_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { +pub fn vzipq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { unsafe { - let a0: int16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); - let b0: int16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); - transmute((a0, b0)) + let a: int8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: int8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let a0: int8x16_t = simd_shuffle!( + a, + b, + [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] + ); + let b0: int8x16_t = simd_shuffle!( + a, + b, + [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] + ); + let mut ret_val: int8x16x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val.1 = simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip1) @@ -73649,19 +70650,20 @@ pub fn vzip_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzip_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { +pub fn vzipq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { unsafe { - let a0: uint8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); - let b0: uint8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + let a0: int16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: int16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); transmute((a0, b0)) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip1) @@ -73678,19 +70680,25 @@ pub fn vzip_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzip_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { +pub fn vzipq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { unsafe { - let a0: uint16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); - let b0: uint16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); - transmute((a0, b0)) + let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a0: int16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: int16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + let mut ret_val: int16x8x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_p8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip1) @@ -73707,19 +70715,20 @@ pub fn vzip_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzip_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { +pub fn vzipq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { unsafe { - let a0: poly8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); - let b0: poly8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + let a0: int32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: int32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); transmute((a0, b0)) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_p16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] #[cfg_attr( all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip1) @@ -73736,16 +70745,22 @@ pub fn vzip_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzip_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { +pub fn vzipq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { unsafe { - let a0: poly16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); - let b0: poly16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); - transmute((a0, b0)) + let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a0: int32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: int32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + let mut ret_val: int32x4x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_f32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] @@ -73765,16 +70780,25 @@ pub fn vzip_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzipq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t { +pub fn vzipq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { unsafe { - let a0: float32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); - let b0: float32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + let a0: uint8x16_t = simd_shuffle!( + a, + b, + [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] + ); + let b0: uint8x16_t = simd_shuffle!( + a, + b, + [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] + ); transmute((a0, b0)) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] @@ -73794,24 +70818,40 @@ pub fn vzipq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzipq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { +pub fn vzipq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { unsafe { - let a0: int8x16_t = simd_shuffle!( + let a: uint8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let a0: uint8x16_t = simd_shuffle!( a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] ); - let b0: int8x16_t = simd_shuffle!( + let b0: uint8x16_t = simd_shuffle!( a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] ); - transmute((a0, b0)) + let mut ret_val: uint8x16x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val.1 = simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] @@ -73831,16 +70871,17 @@ pub fn vzipq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzipq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { +pub fn vzipq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { unsafe { - let a0: int16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); - let b0: int16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + let a0: uint16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: uint16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); transmute((a0, b0)) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u16)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] @@ -73860,16 +70901,22 @@ pub fn vzipq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzipq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { +pub fn vzipq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { unsafe { - let a0: int32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); - let b0: int32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); - transmute((a0, b0)) + let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a0: uint16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: uint16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + let mut ret_val: uint16x8x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] @@ -73889,24 +70936,17 @@ pub fn vzipq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzipq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { +pub fn vzipq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { unsafe { - let a0: uint8x16_t = simd_shuffle!( - a, - b, - [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] - ); - let b0: uint8x16_t = simd_shuffle!( - a, - b, - [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] - ); + let a0: uint32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: uint32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); transmute((a0, b0)) } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] @@ -73926,16 +70966,22 @@ pub fn vzipq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzipq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { +pub fn vzipq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { unsafe { - let a0: uint16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); - let b0: uint16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); - transmute((a0, b0)) + let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]); + let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]); + let a0: uint32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); + let b0: uint32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + let mut ret_val: uint32x4x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]); + ret_val } } #[doc = "Zip vectors"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_p8)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] @@ -73955,16 +71001,25 @@ pub fn vzipq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") )] -pub fn vzipq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { +pub fn vzipq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t { unsafe { - let a0: uint32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]); - let b0: uint32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]); + let a0: poly8x16_t = simd_shuffle!( + a, + b, + [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] + ); + let b0: poly8x16_t = simd_shuffle!( + a, + b, + [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] + ); transmute((a0, b0)) } } #[doc = "Zip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_p8)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] @@ -73986,6 +71041,10 @@ pub fn vzipq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { )] pub fn vzipq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t { unsafe { + let a: poly8x16_t = + simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly8x16_t = + simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]); let a0: poly8x16_t = simd_shuffle!( a, b, @@ -73996,12 +71055,24 @@ pub fn vzipq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t { b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] ); - transmute((a0, b0)) + let mut ret_val: poly8x16x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!( + ret_val.0, + ret_val.0, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val.1 = simd_shuffle!( + ret_val.1, + ret_val.1, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + ); + ret_val } } #[doc = "Zip vectors"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_p16)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] @@ -74028,3 +71099,38 @@ pub fn vzipq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t { transmute((a0, b0)) } } +#[doc = "Zip vectors"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_p16)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip1) +)] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(zip2) +)] +#[cfg_attr( + not(target_arch = "arm"), + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vzipq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t { + unsafe { + let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]); + let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]); + let a0: poly16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: poly16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + let mut ret_val: poly16x8x2_t = transmute((a0, b0)); + ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]); + ret_val + } +} diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs index ed65de2b89460..4cc7f64f2dc29 100644 --- a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs +++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs @@ -7,7 +7,7 @@ mod generated; #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))] pub use self::generated::*; -use crate::{core_arch::simd::*, hint::unreachable_unchecked, intrinsics::simd::*, mem::transmute}; +use crate::{core_arch::simd::*, intrinsics::simd::*, mem::transmute}; #[cfg(test)] use stdarch_test::assert_instr; @@ -1663,235 +1663,6 @@ mod tests { assert_eq!(r, e) } - #[simd_test(enable = "neon")] - fn test_vget_lane_u8() { - let v = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let r = vget_lane_u8::<1>(v.into()); - assert_eq!(r, 2); - } - - #[simd_test(enable = "neon")] - fn test_vgetq_lane_u32() { - let v = u32x4::new(1, 2, 3, 4); - let r = vgetq_lane_u32::<1>(v.into()); - assert_eq!(r, 2); - } - - #[simd_test(enable = "neon")] - fn test_vgetq_lane_s32() { - let v = i32x4::new(1, 2, 3, 4); - let r = vgetq_lane_s32::<1>(v.into()); - assert_eq!(r, 2); - } - - #[simd_test(enable = "neon")] - fn test_vget_lane_u64() { - let v = u64x1::new(1); - let r = vget_lane_u64::<0>(v.into()); - assert_eq!(r, 1); - } - - #[simd_test(enable = "neon")] - fn test_vgetq_lane_u16() { - let v = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let r = vgetq_lane_u16::<1>(v.into()); - assert_eq!(r, 2); - } - - #[simd_test(enable = "neon")] - fn test_vget_lane_s8() { - let v = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let r = vget_lane_s8::<2>(v.into()); - assert_eq!(r, 2); - let r = vget_lane_s8::<4>(v.into()); - assert_eq!(r, 4); - let r = vget_lane_s8::<5>(v.into()); - assert_eq!(r, 5); - } - - #[simd_test(enable = "neon")] - fn test_vget_lane_p8() { - let v = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let r = vget_lane_p8::<2>(v.into()); - assert_eq!(r, 2); - let r = vget_lane_p8::<3>(v.into()); - assert_eq!(r, 3); - let r = vget_lane_p8::<5>(v.into()); - assert_eq!(r, 5); - } - - #[simd_test(enable = "neon")] - fn test_vget_lane_p16() { - let v = u16x4::new(0, 1, 2, 3); - let r = vget_lane_p16::<2>(v.into()); - assert_eq!(r, 2); - let r = vget_lane_p16::<3>(v.into()); - assert_eq!(r, 3); - let r = vget_lane_p16::<0>(v.into()); - assert_eq!(r, 0); - let r = vget_lane_p16::<1>(v.into()); - assert_eq!(r, 1); - } - - #[simd_test(enable = "neon")] - fn test_vget_lane_s16() { - let v = i16x4::new(0, 1, 2, 3); - let r = vget_lane_s16::<2>(v.into()); - assert_eq!(r, 2); - let r = vget_lane_s16::<3>(v.into()); - assert_eq!(r, 3); - let r = vget_lane_s16::<0>(v.into()); - assert_eq!(r, 0); - let r = vget_lane_s16::<1>(v.into()); - assert_eq!(r, 1); - } - - #[simd_test(enable = "neon")] - fn test_vget_lane_u16() { - let v = u16x4::new(0, 1, 2, 3); - let r = vget_lane_u16::<2>(v.into()); - assert_eq!(r, 2); - let r = vget_lane_u16::<3>(v.into()); - assert_eq!(r, 3); - let r = vget_lane_u16::<0>(v.into()); - assert_eq!(r, 0); - let r = vget_lane_u16::<1>(v.into()); - assert_eq!(r, 1); - } - - #[simd_test(enable = "neon")] - fn test_vget_lane_f32() { - let v = f32x2::new(0.0, 1.0); - let r = vget_lane_f32::<1>(v.into()); - assert_eq!(r, 1.0); - let r = vget_lane_f32::<0>(v.into()); - assert_eq!(r, 0.0); - } - - #[simd_test(enable = "neon")] - fn test_vget_lane_s32() { - let v = i32x2::new(0, 1); - let r = vget_lane_s32::<1>(v.into()); - assert_eq!(r, 1); - let r = vget_lane_s32::<0>(v.into()); - assert_eq!(r, 0); - } - - #[simd_test(enable = "neon")] - fn test_vget_lane_u32() { - let v = u32x2::new(0, 1); - let r = vget_lane_u32::<1>(v.into()); - assert_eq!(r, 1); - let r = vget_lane_u32::<0>(v.into()); - assert_eq!(r, 0); - } - - #[simd_test(enable = "neon")] - fn test_vget_lane_s64() { - let v = i64x1::new(1); - let r = vget_lane_s64::<0>(v.into()); - assert_eq!(r, 1); - } - - #[simd_test(enable = "neon")] - fn test_vget_lane_p64() { - let v = u64x1::new(1); - let r = vget_lane_p64::<0>(v.into()); - assert_eq!(r, 1); - } - - #[simd_test(enable = "neon")] - fn test_vgetq_lane_s8() { - let v = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - let r = vgetq_lane_s8::<7>(v.into()); - assert_eq!(r, 7); - let r = vgetq_lane_s8::<13>(v.into()); - assert_eq!(r, 13); - let r = vgetq_lane_s8::<3>(v.into()); - assert_eq!(r, 3); - let r = vgetq_lane_s8::<0>(v.into()); - assert_eq!(r, 0); - } - - #[simd_test(enable = "neon")] - fn test_vgetq_lane_p8() { - let v = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - let r = vgetq_lane_p8::<7>(v.into()); - assert_eq!(r, 7); - let r = vgetq_lane_p8::<13>(v.into()); - assert_eq!(r, 13); - let r = vgetq_lane_p8::<3>(v.into()); - assert_eq!(r, 3); - let r = vgetq_lane_p8::<0>(v.into()); - assert_eq!(r, 0); - } - - #[simd_test(enable = "neon")] - fn test_vgetq_lane_u8() { - let v = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - let r = vgetq_lane_u8::<7>(v.into()); - assert_eq!(r, 7); - let r = vgetq_lane_u8::<13>(v.into()); - assert_eq!(r, 13); - let r = vgetq_lane_u8::<3>(v.into()); - assert_eq!(r, 3); - let r = vgetq_lane_u8::<0>(v.into()); - assert_eq!(r, 0); - } - - #[simd_test(enable = "neon")] - fn test_vgetq_lane_s16() { - let v = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let r = vgetq_lane_s16::<3>(v.into()); - assert_eq!(r, 3); - let r = vgetq_lane_s16::<6>(v.into()); - assert_eq!(r, 6); - let r = vgetq_lane_s16::<0>(v.into()); - assert_eq!(r, 0); - } - - #[simd_test(enable = "neon")] - fn test_vgetq_lane_p16() { - let v = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let r = vgetq_lane_p16::<3>(v.into()); - assert_eq!(r, 3); - let r = vgetq_lane_p16::<7>(v.into()); - assert_eq!(r, 7); - let r = vgetq_lane_p16::<1>(v.into()); - assert_eq!(r, 1); - } - - #[simd_test(enable = "neon")] - fn test_vgetq_lane_f32() { - let v = f32x4::new(0.0, 1.0, 2.0, 3.0); - let r = vgetq_lane_f32::<3>(v.into()); - assert_eq!(r, 3.0); - let r = vgetq_lane_f32::<0>(v.into()); - assert_eq!(r, 0.0); - let r = vgetq_lane_f32::<2>(v.into()); - assert_eq!(r, 2.0); - let r = vgetq_lane_f32::<1>(v.into()); - assert_eq!(r, 1.0); - } - - #[simd_test(enable = "neon")] - fn test_vgetq_lane_s64() { - let v = i64x2::new(0, 1); - let r = vgetq_lane_s64::<1>(v.into()); - assert_eq!(r, 1); - let r = vgetq_lane_s64::<0>(v.into()); - assert_eq!(r, 0); - } - - #[simd_test(enable = "neon")] - fn test_vgetq_lane_p64() { - let v = u64x2::new(0, 1); - let r = vgetq_lane_p64::<1>(v.into()); - assert_eq!(r, 1); - let r = vgetq_lane_p64::<0>(v.into()); - assert_eq!(r, 0); - } - #[simd_test(enable = "neon")] fn test_vext_s64() { let a: i64x1 = i64x1::new(0); @@ -1910,182 +1681,6 @@ mod tests { assert_eq!(r, e); } - #[simd_test(enable = "neon")] - fn test_vget_high_s8() { - let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - let e = i8x8::new(9, 10, 11, 12, 13, 14, 15, 16); - let r = i8x8::from(vget_high_s8(a.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vget_high_s16() { - let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let e = i16x4::new(5, 6, 7, 8); - let r = i16x4::from(vget_high_s16(a.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vget_high_s32() { - let a = i32x4::new(1, 2, 3, 4); - let e = i32x2::new(3, 4); - let r = i32x2::from(vget_high_s32(a.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vget_high_s64() { - let a = i64x2::new(1, 2); - let e = i64x1::new(2); - let r = i64x1::from(vget_high_s64(a.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vget_high_u8() { - let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - let e = u8x8::new(9, 10, 11, 12, 13, 14, 15, 16); - let r = u8x8::from(vget_high_u8(a.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vget_high_u16() { - let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let e = u16x4::new(5, 6, 7, 8); - let r = u16x4::from(vget_high_u16(a.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vget_high_u32() { - let a = u32x4::new(1, 2, 3, 4); - let e = u32x2::new(3, 4); - let r = u32x2::from(vget_high_u32(a.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vget_high_u64() { - let a = u64x2::new(1, 2); - let e = u64x1::new(2); - let r = u64x1::from(vget_high_u64(a.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vget_high_p8() { - let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - let e = u8x8::new(9, 10, 11, 12, 13, 14, 15, 16); - let r = u8x8::from(vget_high_p8(a.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vget_high_p16() { - let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let e = u16x4::new(5, 6, 7, 8); - let r = u16x4::from(vget_high_p16(a.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vget_high_f32() { - let a = f32x4::new(1.0, 2.0, 3.0, 4.0); - let e = f32x2::new(3.0, 4.0); - let r = f32x2::from(vget_high_f32(a.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vget_low_s8() { - let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let r = i8x8::from(vget_low_s8(a.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vget_low_s16() { - let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let e = i16x4::new(1, 2, 3, 4); - let r = i16x4::from(vget_low_s16(a.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vget_low_s32() { - let a = i32x4::new(1, 2, 3, 4); - let e = i32x2::new(1, 2); - let r = i32x2::from(vget_low_s32(a.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vget_low_s64() { - let a = i64x2::new(1, 2); - let e = i64x1::new(1); - let r = i64x1::from(vget_low_s64(a.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vget_low_u8() { - let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let r = u8x8::from(vget_low_u8(a.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vget_low_u16() { - let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let e = u16x4::new(1, 2, 3, 4); - let r = u16x4::from(vget_low_u16(a.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vget_low_u32() { - let a = u32x4::new(1, 2, 3, 4); - let e = u32x2::new(1, 2); - let r = u32x2::from(vget_low_u32(a.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vget_low_u64() { - let a = u64x2::new(1, 2); - let e = u64x1::new(1); - let r = u64x1::from(vget_low_u64(a.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vget_low_p8() { - let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let r = u8x8::from(vget_low_p8(a.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vget_low_p16() { - let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let e = u16x4::new(1, 2, 3, 4); - let r = u16x4::from(vget_low_p16(a.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vget_low_f32() { - let a = f32x4::new(1.0, 2.0, 3.0, 4.0); - let e = f32x2::new(1.0, 2.0); - let r = f32x2::from(vget_low_f32(a.into())); - assert_eq!(r, e); - } - #[simd_test(enable = "neon")] fn test_vdupq_n_s8() { let v: i8 = 42; @@ -2469,13 +2064,6 @@ mod tests { assert_eq!(r, e); } - #[simd_test(enable = "neon")] - fn test_vgetq_lane_u64() { - let v = u64x2::new(1, 2); - let r = vgetq_lane_u64::<1>(v.into()); - assert_eq!(r, 2); - } - #[simd_test(enable = "neon")] fn test_vadd_s8() { test_ari_s8( @@ -2638,72 +2226,6 @@ mod tests { assert_eq!(r, e); } - #[simd_test(enable = "neon")] - fn test_vaddl_high_s8() { - let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - let x = i8::MAX; - let b = i8x16::new(x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x); - let x = x as i16; - let e = i16x8::new(x + 8, x + 9, x + 10, x + 11, x + 12, x + 13, x + 14, x + 15); - let r = i16x8::from(vaddl_high_s8(a.into(), b.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vaddl_high_s16() { - let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let x = i16::MAX; - let b = i16x8::new(x, x, x, x, x, x, x, x); - let x = x as i32; - let e = i32x4::new(x + 4, x + 5, x + 6, x + 7); - let r = i32x4::from(vaddl_high_s16(a.into(), b.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vaddl_high_s32() { - let a = i32x4::new(0, 1, 2, 3); - let x = i32::MAX; - let b = i32x4::new(x, x, x, x); - let x = x as i64; - let e = i64x2::new(x + 2, x + 3); - let r = i64x2::from(vaddl_high_s32(a.into(), b.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vaddl_high_u8() { - let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - let x = u8::MAX; - let b = u8x16::new(x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x); - let x = x as u16; - let e = u16x8::new(x + 8, x + 9, x + 10, x + 11, x + 12, x + 13, x + 14, x + 15); - let r = u16x8::from(vaddl_high_u8(a.into(), b.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vaddl_high_u16() { - let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let x = u16::MAX; - let b = u16x8::new(x, x, x, x, x, x, x, x); - let x = x as u32; - let e = u32x4::new(x + 4, x + 5, x + 6, x + 7); - let r = u32x4::from(vaddl_high_u16(a.into(), b.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vaddl_high_u32() { - let a = u32x4::new(0, 1, 2, 3); - let x = u32::MAX; - let b = u32x4::new(x, x, x, x); - let x = x as u64; - let e = u64x2::new(x + 2, x + 3); - let r = u64x2::from(vaddl_high_u32(a.into(), b.into())); - assert_eq!(r, e); - } - #[simd_test(enable = "neon")] fn test_vaddw_s8() { let x = i16::MAX; @@ -2794,96 +2316,6 @@ mod tests { assert_eq!(r, e); } - #[simd_test(enable = "neon")] - fn test_vaddw_high_s8() { - let x = i16::MAX; - let a = i16x8::new(x, 1, 2, 3, 4, 5, 6, 7); - let y = i8::MAX; - let b = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, y, y, y, y, y, y, y, y); - let y = y as i16; - let e = i16x8::new( - x.wrapping_add(y), - 1 + y, - 2 + y, - 3 + y, - 4 + y, - 5 + y, - 6 + y, - 7 + y, - ); - let r = i16x8::from(vaddw_high_s8(a.into(), b.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vaddw_high_s16() { - let x = i32::MAX; - let a = i32x4::new(x, 1, 2, 3); - let y = i16::MAX; - let b = i16x8::new(0, 0, 0, 0, y, y, y, y); - let y = y as i32; - let e = i32x4::new(x.wrapping_add(y), 1 + y, 2 + y, 3 + y); - let r = i32x4::from(vaddw_high_s16(a.into(), b.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vaddw_high_s32() { - let x = i64::MAX; - let a = i64x2::new(x, 1); - let y = i32::MAX; - let b = i32x4::new(0, 0, y, y); - let y = y as i64; - let e = i64x2::new(x.wrapping_add(y), 1 + y); - let r = i64x2::from(vaddw_high_s32(a.into(), b.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vaddw_high_u8() { - let x = u16::MAX; - let a = u16x8::new(x, 1, 2, 3, 4, 5, 6, 7); - let y = u8::MAX; - let b = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, y, y, y, y, y, y, y, y); - let y = y as u16; - let e = u16x8::new( - x.wrapping_add(y), - 1 + y, - 2 + y, - 3 + y, - 4 + y, - 5 + y, - 6 + y, - 7 + y, - ); - let r = u16x8::from(vaddw_high_u8(a.into(), b.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vaddw_high_u16() { - let x = u32::MAX; - let a = u32x4::new(x, 1, 2, 3); - let y = u16::MAX; - let b = u16x8::new(0, 0, 0, 0, y, y, y, y); - let y = y as u32; - let e = u32x4::new(x.wrapping_add(y), 1 + y, 2 + y, 3 + y); - let r = u32x4::from(vaddw_high_u16(a.into(), b.into())); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - fn test_vaddw_high_u32() { - let x = u64::MAX; - let a = u64x2::new(x, 1); - let y = u32::MAX; - let b = u32x4::new(0, 0, y, y); - let y = y as u64; - let e = u64x2::new(x.wrapping_add(y), 1 + y); - let r = u64x2::from(vaddw_high_u32(a.into(), b.into())); - assert_eq!(r, e); - } - #[simd_test(enable = "neon")] fn test_vmvn_s8() { let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); @@ -5766,42 +5198,9 @@ mod tests { assert_eq!(r, e); } - macro_rules! test_vcombine { - ($test_id:ident => $fn_id:ident ([$($a:expr),*], [$($b:expr),*])) => { - #[allow(unused_assignments)] - #[simd_test(enable = "neon")] - fn $test_id() { - let a = Simd::from_array([$($a),*]); - let b = Simd::from_array([$($b),*]); - let e = Simd::from_array([$($a),* $(, $b)*]); - let c = $fn_id(a.into(), b.into()); - let mut d = e; - d = c.into(); - assert_eq!(d, e); - } - } - } - - test_vcombine!(test_vcombine_s8 => vcombine_s8([3_i8, -4, 5, -6, 7, 8, 9, 10], [13_i8, -14, 15, -16, 17, 18, 19, 110])); - test_vcombine!(test_vcombine_u8 => vcombine_u8([3_u8, 4, 5, 6, 7, 8, 9, 10], [13_u8, 14, 15, 16, 17, 18, 19, 110])); - test_vcombine!(test_vcombine_p8 => vcombine_p8([3_u8, 4, 5, 6, 7, 8, 9, 10], [13_u8, 14, 15, 16, 17, 18, 19, 110])); - - test_vcombine!(test_vcombine_s16 => vcombine_s16([3_i16, -4, 5, -6], [13_i16, -14, 15, -16])); - test_vcombine!(test_vcombine_u16 => vcombine_u16([3_u16, 4, 5, 6], [13_u16, 14, 15, 16])); - test_vcombine!(test_vcombine_p16 => vcombine_p16([3_u16, 4, 5, 6], [13_u16, 14, 15, 16])); - #[cfg(not(target_arch = "arm64ec"))] mod fp16 { use super::*; - #[simd_test(enable = "neon,fp16")] - fn test_vcombine_f16() { - let a = f16x4::from_array([3_f16, 4., 5., 6.]); - let b = f16x4::from_array([13_f16, 14., 15., 16.]); - let e = f16x8::from_array([3_f16, 4., 5., 6., 13_f16, 14., 15., 16.]); - let c = f16x8::from(vcombine_f16(a.into(), b.into())); - assert_eq!(c, e); - } - #[simd_test(enable = "neon,fp16")] fn test_vld1_lane_f16() { let a = f16x4::new(0., 1., 2., 3.); @@ -5837,17 +5236,6 @@ mod tests { } } - test_vcombine!(test_vcombine_s32 => vcombine_s32([3_i32, -4], [13_i32, -14])); - test_vcombine!(test_vcombine_u32 => vcombine_u32([3_u32, 4], [13_u32, 14])); - // note: poly32x4 does not exist, and neither does vcombine_p32 - test_vcombine!(test_vcombine_f32 => vcombine_f32([3_f32, -4.], [13_f32, -14.])); - - test_vcombine!(test_vcombine_s64 => vcombine_s64([-3_i64], [13_i64])); - test_vcombine!(test_vcombine_u64 => vcombine_u64([3_u64], [13_u64])); - test_vcombine!(test_vcombine_p64 => vcombine_p64([3_u64], [13_u64])); - #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] - test_vcombine!(test_vcombine_f64 => vcombine_f64([-3_f64], [13_f64])); - macro_rules! lane_wide_store_load_roundtrip { ($elem_ty:ty, $len:expr, $idx:expr, $vec_ty:ty, $store:ident, $load:ident) => { let vals: [$elem_ty; $len] = crate::array::from_fn(|i| i as $elem_ty); @@ -5860,11 +5248,10 @@ mod tests { }; } - // Most of these are implemented with builtins, which miri can't handle macro_rules! lane_wide_store_load_roundtrip_neon { ($( $name:ident $args:tt);* $(;)?) => { $( - #[cfg_attr(miri, ignore)] + #[cfg_attr(miri, ignore)] // uses unsupported vendor intrinsics #[simd_test(enable = "neon")] unsafe fn $name() { lane_wide_store_load_roundtrip! $args; @@ -5876,7 +5263,7 @@ mod tests { macro_rules! lane_wide_store_load_roundtrip_fp16 { ($( $name:ident $args:tt);* $(;)?) => { $( - #[cfg_attr(miri, ignore)] + #[cfg_attr(miri, ignore)] // uses unsupported vendor intrinsics #[simd_test(enable = "neon,fp16")] #[cfg(not(target_arch = "arm64ec"))] unsafe fn $name() { diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs b/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs index c7dbd46480a42..e05e19457319d 100644 --- a/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs +++ b/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs @@ -43,14 +43,6 @@ unsafe extern "unadjusted" { fn __lasx_xvsrlri_w(a: __v8i32, b: u32) -> __v8i32; #[link_name = "llvm.loongarch.lasx.xvsrlri.d"] fn __lasx_xvsrlri_d(a: __v4i64, b: u32) -> __v4i64; - #[link_name = "llvm.loongarch.lasx.xvbitclr.b"] - fn __lasx_xvbitclr_b(a: __v32u8, b: __v32u8) -> __v32u8; - #[link_name = "llvm.loongarch.lasx.xvbitclr.h"] - fn __lasx_xvbitclr_h(a: __v16u16, b: __v16u16) -> __v16u16; - #[link_name = "llvm.loongarch.lasx.xvbitclr.w"] - fn __lasx_xvbitclr_w(a: __v8u32, b: __v8u32) -> __v8u32; - #[link_name = "llvm.loongarch.lasx.xvbitclr.d"] - fn __lasx_xvbitclr_d(a: __v4u64, b: __v4u64) -> __v4u64; #[link_name = "llvm.loongarch.lasx.xvbitclri.b"] fn __lasx_xvbitclri_b(a: __v32u8, b: u32) -> __v32u8; #[link_name = "llvm.loongarch.lasx.xvbitclri.h"] @@ -59,14 +51,6 @@ unsafe extern "unadjusted" { fn __lasx_xvbitclri_w(a: __v8u32, b: u32) -> __v8u32; #[link_name = "llvm.loongarch.lasx.xvbitclri.d"] fn __lasx_xvbitclri_d(a: __v4u64, b: u32) -> __v4u64; - #[link_name = "llvm.loongarch.lasx.xvbitset.b"] - fn __lasx_xvbitset_b(a: __v32u8, b: __v32u8) -> __v32u8; - #[link_name = "llvm.loongarch.lasx.xvbitset.h"] - fn __lasx_xvbitset_h(a: __v16u16, b: __v16u16) -> __v16u16; - #[link_name = "llvm.loongarch.lasx.xvbitset.w"] - fn __lasx_xvbitset_w(a: __v8u32, b: __v8u32) -> __v8u32; - #[link_name = "llvm.loongarch.lasx.xvbitset.d"] - fn __lasx_xvbitset_d(a: __v4u64, b: __v4u64) -> __v4u64; #[link_name = "llvm.loongarch.lasx.xvbitseti.b"] fn __lasx_xvbitseti_b(a: __v32u8, b: u32) -> __v32u8; #[link_name = "llvm.loongarch.lasx.xvbitseti.h"] @@ -75,14 +59,6 @@ unsafe extern "unadjusted" { fn __lasx_xvbitseti_w(a: __v8u32, b: u32) -> __v8u32; #[link_name = "llvm.loongarch.lasx.xvbitseti.d"] fn __lasx_xvbitseti_d(a: __v4u64, b: u32) -> __v4u64; - #[link_name = "llvm.loongarch.lasx.xvbitrev.b"] - fn __lasx_xvbitrev_b(a: __v32u8, b: __v32u8) -> __v32u8; - #[link_name = "llvm.loongarch.lasx.xvbitrev.h"] - fn __lasx_xvbitrev_h(a: __v16u16, b: __v16u16) -> __v16u16; - #[link_name = "llvm.loongarch.lasx.xvbitrev.w"] - fn __lasx_xvbitrev_w(a: __v8u32, b: __v8u32) -> __v8u32; - #[link_name = "llvm.loongarch.lasx.xvbitrev.d"] - fn __lasx_xvbitrev_d(a: __v4u64, b: __v4u64) -> __v4u64; #[link_name = "llvm.loongarch.lasx.xvbitrevi.b"] fn __lasx_xvbitrevi_b(a: __v32u8, b: u32) -> __v32u8; #[link_name = "llvm.loongarch.lasx.xvbitrevi.h"] @@ -115,30 +91,6 @@ unsafe extern "unadjusted" { fn __lasx_xvsat_wu(a: __v8u32, b: u32) -> __v8u32; #[link_name = "llvm.loongarch.lasx.xvsat.du"] fn __lasx_xvsat_du(a: __v4u64, b: u32) -> __v4u64; - #[link_name = "llvm.loongarch.lasx.xvadda.b"] - fn __lasx_xvadda_b(a: __v32i8, b: __v32i8) -> __v32i8; - #[link_name = "llvm.loongarch.lasx.xvadda.h"] - fn __lasx_xvadda_h(a: __v16i16, b: __v16i16) -> __v16i16; - #[link_name = "llvm.loongarch.lasx.xvadda.w"] - fn __lasx_xvadda_w(a: __v8i32, b: __v8i32) -> __v8i32; - #[link_name = "llvm.loongarch.lasx.xvadda.d"] - fn __lasx_xvadda_d(a: __v4i64, b: __v4i64) -> __v4i64; - #[link_name = "llvm.loongarch.lasx.xvsadd.b"] - fn __lasx_xvsadd_b(a: __v32i8, b: __v32i8) -> __v32i8; - #[link_name = "llvm.loongarch.lasx.xvsadd.h"] - fn __lasx_xvsadd_h(a: __v16i16, b: __v16i16) -> __v16i16; - #[link_name = "llvm.loongarch.lasx.xvsadd.w"] - fn __lasx_xvsadd_w(a: __v8i32, b: __v8i32) -> __v8i32; - #[link_name = "llvm.loongarch.lasx.xvsadd.d"] - fn __lasx_xvsadd_d(a: __v4i64, b: __v4i64) -> __v4i64; - #[link_name = "llvm.loongarch.lasx.xvsadd.bu"] - fn __lasx_xvsadd_bu(a: __v32u8, b: __v32u8) -> __v32u8; - #[link_name = "llvm.loongarch.lasx.xvsadd.hu"] - fn __lasx_xvsadd_hu(a: __v16u16, b: __v16u16) -> __v16u16; - #[link_name = "llvm.loongarch.lasx.xvsadd.wu"] - fn __lasx_xvsadd_wu(a: __v8u32, b: __v8u32) -> __v8u32; - #[link_name = "llvm.loongarch.lasx.xvsadd.du"] - fn __lasx_xvsadd_du(a: __v4u64, b: __v4u64) -> __v4u64; #[link_name = "llvm.loongarch.lasx.xvavg.b"] fn __lasx_xvavg_b(a: __v32i8, b: __v32i8) -> __v32i8; #[link_name = "llvm.loongarch.lasx.xvavg.h"] @@ -171,38 +123,6 @@ unsafe extern "unadjusted" { fn __lasx_xvavgr_wu(a: __v8u32, b: __v8u32) -> __v8u32; #[link_name = "llvm.loongarch.lasx.xvavgr.du"] fn __lasx_xvavgr_du(a: __v4u64, b: __v4u64) -> __v4u64; - #[link_name = "llvm.loongarch.lasx.xvssub.b"] - fn __lasx_xvssub_b(a: __v32i8, b: __v32i8) -> __v32i8; - #[link_name = "llvm.loongarch.lasx.xvssub.h"] - fn __lasx_xvssub_h(a: __v16i16, b: __v16i16) -> __v16i16; - #[link_name = "llvm.loongarch.lasx.xvssub.w"] - fn __lasx_xvssub_w(a: __v8i32, b: __v8i32) -> __v8i32; - #[link_name = "llvm.loongarch.lasx.xvssub.d"] - fn __lasx_xvssub_d(a: __v4i64, b: __v4i64) -> __v4i64; - #[link_name = "llvm.loongarch.lasx.xvssub.bu"] - fn __lasx_xvssub_bu(a: __v32u8, b: __v32u8) -> __v32u8; - #[link_name = "llvm.loongarch.lasx.xvssub.hu"] - fn __lasx_xvssub_hu(a: __v16u16, b: __v16u16) -> __v16u16; - #[link_name = "llvm.loongarch.lasx.xvssub.wu"] - fn __lasx_xvssub_wu(a: __v8u32, b: __v8u32) -> __v8u32; - #[link_name = "llvm.loongarch.lasx.xvssub.du"] - fn __lasx_xvssub_du(a: __v4u64, b: __v4u64) -> __v4u64; - #[link_name = "llvm.loongarch.lasx.xvabsd.b"] - fn __lasx_xvabsd_b(a: __v32i8, b: __v32i8) -> __v32i8; - #[link_name = "llvm.loongarch.lasx.xvabsd.h"] - fn __lasx_xvabsd_h(a: __v16i16, b: __v16i16) -> __v16i16; - #[link_name = "llvm.loongarch.lasx.xvabsd.w"] - fn __lasx_xvabsd_w(a: __v8i32, b: __v8i32) -> __v8i32; - #[link_name = "llvm.loongarch.lasx.xvabsd.d"] - fn __lasx_xvabsd_d(a: __v4i64, b: __v4i64) -> __v4i64; - #[link_name = "llvm.loongarch.lasx.xvabsd.bu"] - fn __lasx_xvabsd_bu(a: __v32u8, b: __v32u8) -> __v32u8; - #[link_name = "llvm.loongarch.lasx.xvabsd.hu"] - fn __lasx_xvabsd_hu(a: __v16u16, b: __v16u16) -> __v16u16; - #[link_name = "llvm.loongarch.lasx.xvabsd.wu"] - fn __lasx_xvabsd_wu(a: __v8u32, b: __v8u32) -> __v8u32; - #[link_name = "llvm.loongarch.lasx.xvabsd.du"] - fn __lasx_xvabsd_du(a: __v4u64, b: __v4u64) -> __v4u64; #[link_name = "llvm.loongarch.lasx.xvhaddw.h.b"] fn __lasx_xvhaddw_h_b(a: __v32i8, b: __v32i8) -> __v16i16; #[link_name = "llvm.loongarch.lasx.xvhaddw.w.h"] @@ -235,22 +155,6 @@ unsafe extern "unadjusted" { fn __lasx_xvrepl128vei_w(a: __v8i32, b: u32) -> __v8i32; #[link_name = "llvm.loongarch.lasx.xvrepl128vei.d"] fn __lasx_xvrepl128vei_d(a: __v4i64, b: u32) -> __v4i64; - #[link_name = "llvm.loongarch.lasx.xvpickev.b"] - fn __lasx_xvpickev_b(a: __v32i8, b: __v32i8) -> __v32i8; - #[link_name = "llvm.loongarch.lasx.xvpickev.h"] - fn __lasx_xvpickev_h(a: __v16i16, b: __v16i16) -> __v16i16; - #[link_name = "llvm.loongarch.lasx.xvpickev.w"] - fn __lasx_xvpickev_w(a: __v8i32, b: __v8i32) -> __v8i32; - #[link_name = "llvm.loongarch.lasx.xvpickev.d"] - fn __lasx_xvpickev_d(a: __v4i64, b: __v4i64) -> __v4i64; - #[link_name = "llvm.loongarch.lasx.xvpickod.b"] - fn __lasx_xvpickod_b(a: __v32i8, b: __v32i8) -> __v32i8; - #[link_name = "llvm.loongarch.lasx.xvpickod.h"] - fn __lasx_xvpickod_h(a: __v16i16, b: __v16i16) -> __v16i16; - #[link_name = "llvm.loongarch.lasx.xvpickod.w"] - fn __lasx_xvpickod_w(a: __v8i32, b: __v8i32) -> __v8i32; - #[link_name = "llvm.loongarch.lasx.xvpickod.d"] - fn __lasx_xvpickod_d(a: __v4i64, b: __v4i64) -> __v4i64; #[link_name = "llvm.loongarch.lasx.xvilvh.b"] fn __lasx_xvilvh_b(a: __v32i8, b: __v32i8) -> __v32i8; #[link_name = "llvm.loongarch.lasx.xvilvh.h"] @@ -1285,34 +1189,6 @@ pub fn lasx_xvsrlri_d(a: m256i) -> m256i { unsafe { transmute(__lasx_xvsrlri_d(transmute(a), IMM6)) } } -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvbitclr_b(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvbitclr_b(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvbitclr_h(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvbitclr_h(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvbitclr_w(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvbitclr_w(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvbitclr_d(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvbitclr_d(transmute(a), transmute(b))) } -} - #[inline] #[target_feature(enable = "lasx")] #[rustc_legacy_const_generics(1)] @@ -1349,34 +1225,6 @@ pub fn lasx_xvbitclri_d(a: m256i) -> m256i { unsafe { transmute(__lasx_xvbitclri_d(transmute(a), IMM6)) } } -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvbitset_b(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvbitset_b(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvbitset_h(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvbitset_h(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvbitset_w(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvbitset_w(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvbitset_d(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvbitset_d(transmute(a), transmute(b))) } -} - #[inline] #[target_feature(enable = "lasx")] #[rustc_legacy_const_generics(1)] @@ -1413,34 +1261,6 @@ pub fn lasx_xvbitseti_d(a: m256i) -> m256i { unsafe { transmute(__lasx_xvbitseti_d(transmute(a), IMM6)) } } -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvbitrev_b(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvbitrev_b(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvbitrev_h(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvbitrev_h(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvbitrev_w(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvbitrev_w(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvbitrev_d(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvbitrev_d(transmute(a), transmute(b))) } -} - #[inline] #[target_feature(enable = "lasx")] #[rustc_legacy_const_generics(1)] @@ -1585,90 +1405,6 @@ pub fn lasx_xvsat_du(a: m256i) -> m256i { unsafe { transmute(__lasx_xvsat_du(transmute(a), IMM6)) } } -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvadda_b(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvadda_b(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvadda_h(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvadda_h(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvadda_w(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvadda_w(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvadda_d(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvadda_d(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvsadd_b(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvsadd_b(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvsadd_h(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvsadd_h(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvsadd_w(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvsadd_w(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvsadd_d(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvsadd_d(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvsadd_bu(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvsadd_bu(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvsadd_hu(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvsadd_hu(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvsadd_wu(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvsadd_wu(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvsadd_du(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvsadd_du(transmute(a), transmute(b))) } -} - #[inline] #[target_feature(enable = "lasx")] #[unstable(feature = "stdarch_loongarch", issue = "117427")] @@ -1781,118 +1517,6 @@ pub fn lasx_xvavgr_du(a: m256i, b: m256i) -> m256i { unsafe { transmute(__lasx_xvavgr_du(transmute(a), transmute(b))) } } -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvssub_b(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvssub_b(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvssub_h(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvssub_h(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvssub_w(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvssub_w(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvssub_d(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvssub_d(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvssub_bu(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvssub_bu(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvssub_hu(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvssub_hu(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvssub_wu(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvssub_wu(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvssub_du(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvssub_du(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvabsd_b(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvabsd_b(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvabsd_h(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvabsd_h(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvabsd_w(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvabsd_w(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvabsd_d(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvabsd_d(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvabsd_bu(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvabsd_bu(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvabsd_hu(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvabsd_hu(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvabsd_wu(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvabsd_wu(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvabsd_du(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvabsd_du(transmute(a), transmute(b))) } -} - #[inline] #[target_feature(enable = "lasx")] #[unstable(feature = "stdarch_loongarch", issue = "117427")] @@ -2013,62 +1637,6 @@ pub fn lasx_xvrepl128vei_d(a: m256i) -> m256i { unsafe { transmute(__lasx_xvrepl128vei_d(transmute(a), IMM1)) } } -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvpickev_b(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvpickev_b(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvpickev_h(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvpickev_h(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvpickev_w(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvpickev_w(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvpickev_d(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvpickev_d(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvpickod_b(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvpickod_b(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvpickod_h(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvpickod_h(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvpickod_w(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvpickod_w(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvpickod_d(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvpickod_d(transmute(a), transmute(b))) } -} - #[inline] #[target_feature(enable = "lasx")] #[unstable(feature = "stdarch_loongarch", issue = "117427")] diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lasx/portable.rs b/library/stdarch/crates/core_arch/src/loongarch64/lasx/portable.rs index f2a7254d7947b..1d44f418bfbcd 100644 --- a/library/stdarch/crates/core_arch/src/loongarch64/lasx/portable.rs +++ b/library/stdarch/crates/core_arch/src/loongarch64/lasx/portable.rs @@ -5,6 +5,68 @@ use crate::core_arch::simd::{self as cs, *}; use crate::intrinsics::simd as is; use crate::mem::transmute; +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +const unsafe fn simd_pickev_b(a: T, b: T) -> T { + simd_shuffle!( + b, + a, + [ + 0, 2, 4, 6, 8, 10, 12, 14, 32, 34, 36, 38, 40, 42, 44, 46, + 16, 18, 20, 22, 24, 26, 28, 30, 48, 50, 52, 54, 56, 58, 60, 62 + ] + ) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +const unsafe fn simd_pickev_d(a: T, b: T) -> T { + simd_shuffle!(b, a, [0, 4, 2, 6]) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +const unsafe fn simd_pickev_w(a: T, b: T) -> T { + simd_shuffle!(b, a, [0, 2, 8, 10, 4, 6, 12, 14]) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +const unsafe fn simd_pickev_h(a: T, b: T) -> T { + simd_shuffle!(b, a, [0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30]) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +const unsafe fn simd_pickod_b(a: T, b: T) -> T { + simd_shuffle!( + b, + a, + [ + 1, 3, 5, 7, 9, 11, 13, 15, 33, 35, 37, 39, 41, 43, 45, 47, + 17, 19, 21, 23, 25, 27, 29, 31, 49, 51, 53, 55, 57, 59, 61, 63 + ] + ) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +const unsafe fn simd_pickod_d(a: T, b: T) -> T { + simd_shuffle!(b, a, [1, 5, 3, 7]) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +const unsafe fn simd_pickod_w(a: T, b: T) -> T { + simd_shuffle!(b, a, [1, 3, 9, 11, 5, 7, 13, 15]) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +const unsafe fn simd_pickod_h(a: T, b: T) -> T { + simd_shuffle!(b, a, [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31]) +} + impl_vv!("lasx", lasx_xvpcnt_b, is::simd_ctpop, m256i, i8x32); impl_vv!("lasx", lasx_xvpcnt_h, is::simd_ctpop, m256i, i16x16); impl_vv!("lasx", lasx_xvpcnt_w, is::simd_ctpop, m256i, i32x8); @@ -120,6 +182,54 @@ impl_vvv!("lasx", lasx_xvsrl_b, ls::simd_shr, m256i, u8x32); impl_vvv!("lasx", lasx_xvsrl_h, ls::simd_shr, m256i, u16x16); impl_vvv!("lasx", lasx_xvsrl_w, ls::simd_shr, m256i, u32x8); impl_vvv!("lasx", lasx_xvsrl_d, ls::simd_shr, m256i, u64x4); +impl_vvv!("lasx", lasx_xvbitclr_b, ls::simd_bitclr, m256i, u8x32); +impl_vvv!("lasx", lasx_xvbitclr_h, ls::simd_bitclr, m256i, u16x16); +impl_vvv!("lasx", lasx_xvbitclr_w, ls::simd_bitclr, m256i, u32x8); +impl_vvv!("lasx", lasx_xvbitclr_d, ls::simd_bitclr, m256i, u64x4); +impl_vvv!("lasx", lasx_xvbitset_b, ls::simd_bitset, m256i, u8x32); +impl_vvv!("lasx", lasx_xvbitset_h, ls::simd_bitset, m256i, u16x16); +impl_vvv!("lasx", lasx_xvbitset_w, ls::simd_bitset, m256i, u32x8); +impl_vvv!("lasx", lasx_xvbitset_d, ls::simd_bitset, m256i, u64x4); +impl_vvv!("lasx", lasx_xvbitrev_b, ls::simd_bitrev, m256i, u8x32); +impl_vvv!("lasx", lasx_xvbitrev_h, ls::simd_bitrev, m256i, u16x16); +impl_vvv!("lasx", lasx_xvbitrev_w, ls::simd_bitrev, m256i, u32x8); +impl_vvv!("lasx", lasx_xvbitrev_d, ls::simd_bitrev, m256i, u64x4); +impl_vvv!("lasx", lasx_xvsadd_b, is::simd_saturating_add, m256i, i8x32); +impl_vvv!("lasx", lasx_xvsadd_h, is::simd_saturating_add, m256i, i16x16); +impl_vvv!("lasx", lasx_xvsadd_w, is::simd_saturating_add, m256i, i32x8); +impl_vvv!("lasx", lasx_xvsadd_d, is::simd_saturating_add, m256i, i64x4); +impl_vvv!("lasx", lasx_xvsadd_bu, is::simd_saturating_add, m256i, u8x32); +impl_vvv!("lasx", lasx_xvsadd_hu, is::simd_saturating_add, m256i, u16x16); +impl_vvv!("lasx", lasx_xvsadd_wu, is::simd_saturating_add, m256i, u32x8); +impl_vvv!("lasx", lasx_xvsadd_du, is::simd_saturating_add, m256i, u64x4); +impl_vvv!("lasx", lasx_xvssub_b, is::simd_saturating_sub, m256i, i8x32); +impl_vvv!("lasx", lasx_xvssub_h, is::simd_saturating_sub, m256i, i16x16); +impl_vvv!("lasx", lasx_xvssub_w, is::simd_saturating_sub, m256i, i32x8); +impl_vvv!("lasx", lasx_xvssub_d, is::simd_saturating_sub, m256i, i64x4); +impl_vvv!("lasx", lasx_xvssub_bu, is::simd_saturating_sub, m256i, u8x32); +impl_vvv!("lasx", lasx_xvssub_hu, is::simd_saturating_sub, m256i, u16x16); +impl_vvv!("lasx", lasx_xvssub_wu, is::simd_saturating_sub, m256i, u32x8); +impl_vvv!("lasx", lasx_xvssub_du, is::simd_saturating_sub, m256i, u64x4); +impl_vvv!("lasx", lasx_xvadda_b, ls::simd_adda, m256i, i8x32); +impl_vvv!("lasx", lasx_xvadda_h, ls::simd_adda, m256i, i16x16); +impl_vvv!("lasx", lasx_xvadda_w, ls::simd_adda, m256i, i32x8); +impl_vvv!("lasx", lasx_xvadda_d, ls::simd_adda, m256i, i64x4); +impl_vvv!("lasx", lasx_xvabsd_b, ls::simd_absd, m256i, i8x32); +impl_vvv!("lasx", lasx_xvabsd_h, ls::simd_absd, m256i, i16x16); +impl_vvv!("lasx", lasx_xvabsd_w, ls::simd_absd, m256i, i32x8); +impl_vvv!("lasx", lasx_xvabsd_d, ls::simd_absd, m256i, i64x4); +impl_vvv!("lasx", lasx_xvabsd_bu, ls::simd_absd, m256i, u8x32); +impl_vvv!("lasx", lasx_xvabsd_hu, ls::simd_absd, m256i, u16x16); +impl_vvv!("lasx", lasx_xvabsd_wu, ls::simd_absd, m256i, u32x8); +impl_vvv!("lasx", lasx_xvabsd_du, ls::simd_absd, m256i, u64x4); +impl_vvv!("lasx", lasx_xvpickev_b, simd_pickev_b, m256i, i8x32); +impl_vvv!("lasx", lasx_xvpickev_h, simd_pickev_h, m256i, i16x16); +impl_vvv!("lasx", lasx_xvpickev_w, simd_pickev_w, m256i, i32x8); +impl_vvv!("lasx", lasx_xvpickev_d, simd_pickev_d, m256i, i64x4); +impl_vvv!("lasx", lasx_xvpickod_b, simd_pickod_b, m256i, i8x32); +impl_vvv!("lasx", lasx_xvpickod_h, simd_pickod_h, m256i, i16x16); +impl_vvv!("lasx", lasx_xvpickod_w, simd_pickod_w, m256i, i32x8); +impl_vvv!("lasx", lasx_xvpickod_d, simd_pickod_d, m256i, i64x4); impl_vuv!("lasx", lasx_xvslli_b, is::simd_shl, m256i, i8x32); impl_vuv!("lasx", lasx_xvslli_h, is::simd_shl, m256i, i16x16); diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs b/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs index 4ec3cdf0c5abb..767be195292f2 100644 --- a/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs +++ b/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs @@ -43,14 +43,6 @@ unsafe extern "unadjusted" { fn __lsx_vsrlri_w(a: __v4i32, b: u32) -> __v4i32; #[link_name = "llvm.loongarch.lsx.vsrlri.d"] fn __lsx_vsrlri_d(a: __v2i64, b: u32) -> __v2i64; - #[link_name = "llvm.loongarch.lsx.vbitclr.b"] - fn __lsx_vbitclr_b(a: __v16u8, b: __v16u8) -> __v16u8; - #[link_name = "llvm.loongarch.lsx.vbitclr.h"] - fn __lsx_vbitclr_h(a: __v8u16, b: __v8u16) -> __v8u16; - #[link_name = "llvm.loongarch.lsx.vbitclr.w"] - fn __lsx_vbitclr_w(a: __v4u32, b: __v4u32) -> __v4u32; - #[link_name = "llvm.loongarch.lsx.vbitclr.d"] - fn __lsx_vbitclr_d(a: __v2u64, b: __v2u64) -> __v2u64; #[link_name = "llvm.loongarch.lsx.vbitclri.b"] fn __lsx_vbitclri_b(a: __v16u8, b: u32) -> __v16u8; #[link_name = "llvm.loongarch.lsx.vbitclri.h"] @@ -59,14 +51,6 @@ unsafe extern "unadjusted" { fn __lsx_vbitclri_w(a: __v4u32, b: u32) -> __v4u32; #[link_name = "llvm.loongarch.lsx.vbitclri.d"] fn __lsx_vbitclri_d(a: __v2u64, b: u32) -> __v2u64; - #[link_name = "llvm.loongarch.lsx.vbitset.b"] - fn __lsx_vbitset_b(a: __v16u8, b: __v16u8) -> __v16u8; - #[link_name = "llvm.loongarch.lsx.vbitset.h"] - fn __lsx_vbitset_h(a: __v8u16, b: __v8u16) -> __v8u16; - #[link_name = "llvm.loongarch.lsx.vbitset.w"] - fn __lsx_vbitset_w(a: __v4u32, b: __v4u32) -> __v4u32; - #[link_name = "llvm.loongarch.lsx.vbitset.d"] - fn __lsx_vbitset_d(a: __v2u64, b: __v2u64) -> __v2u64; #[link_name = "llvm.loongarch.lsx.vbitseti.b"] fn __lsx_vbitseti_b(a: __v16u8, b: u32) -> __v16u8; #[link_name = "llvm.loongarch.lsx.vbitseti.h"] @@ -75,14 +59,6 @@ unsafe extern "unadjusted" { fn __lsx_vbitseti_w(a: __v4u32, b: u32) -> __v4u32; #[link_name = "llvm.loongarch.lsx.vbitseti.d"] fn __lsx_vbitseti_d(a: __v2u64, b: u32) -> __v2u64; - #[link_name = "llvm.loongarch.lsx.vbitrev.b"] - fn __lsx_vbitrev_b(a: __v16u8, b: __v16u8) -> __v16u8; - #[link_name = "llvm.loongarch.lsx.vbitrev.h"] - fn __lsx_vbitrev_h(a: __v8u16, b: __v8u16) -> __v8u16; - #[link_name = "llvm.loongarch.lsx.vbitrev.w"] - fn __lsx_vbitrev_w(a: __v4u32, b: __v4u32) -> __v4u32; - #[link_name = "llvm.loongarch.lsx.vbitrev.d"] - fn __lsx_vbitrev_d(a: __v2u64, b: __v2u64) -> __v2u64; #[link_name = "llvm.loongarch.lsx.vbitrevi.b"] fn __lsx_vbitrevi_b(a: __v16u8, b: u32) -> __v16u8; #[link_name = "llvm.loongarch.lsx.vbitrevi.h"] @@ -115,30 +91,6 @@ unsafe extern "unadjusted" { fn __lsx_vsat_wu(a: __v4u32, b: u32) -> __v4u32; #[link_name = "llvm.loongarch.lsx.vsat.du"] fn __lsx_vsat_du(a: __v2u64, b: u32) -> __v2u64; - #[link_name = "llvm.loongarch.lsx.vadda.b"] - fn __lsx_vadda_b(a: __v16i8, b: __v16i8) -> __v16i8; - #[link_name = "llvm.loongarch.lsx.vadda.h"] - fn __lsx_vadda_h(a: __v8i16, b: __v8i16) -> __v8i16; - #[link_name = "llvm.loongarch.lsx.vadda.w"] - fn __lsx_vadda_w(a: __v4i32, b: __v4i32) -> __v4i32; - #[link_name = "llvm.loongarch.lsx.vadda.d"] - fn __lsx_vadda_d(a: __v2i64, b: __v2i64) -> __v2i64; - #[link_name = "llvm.loongarch.lsx.vsadd.b"] - fn __lsx_vsadd_b(a: __v16i8, b: __v16i8) -> __v16i8; - #[link_name = "llvm.loongarch.lsx.vsadd.h"] - fn __lsx_vsadd_h(a: __v8i16, b: __v8i16) -> __v8i16; - #[link_name = "llvm.loongarch.lsx.vsadd.w"] - fn __lsx_vsadd_w(a: __v4i32, b: __v4i32) -> __v4i32; - #[link_name = "llvm.loongarch.lsx.vsadd.d"] - fn __lsx_vsadd_d(a: __v2i64, b: __v2i64) -> __v2i64; - #[link_name = "llvm.loongarch.lsx.vsadd.bu"] - fn __lsx_vsadd_bu(a: __v16u8, b: __v16u8) -> __v16u8; - #[link_name = "llvm.loongarch.lsx.vsadd.hu"] - fn __lsx_vsadd_hu(a: __v8u16, b: __v8u16) -> __v8u16; - #[link_name = "llvm.loongarch.lsx.vsadd.wu"] - fn __lsx_vsadd_wu(a: __v4u32, b: __v4u32) -> __v4u32; - #[link_name = "llvm.loongarch.lsx.vsadd.du"] - fn __lsx_vsadd_du(a: __v2u64, b: __v2u64) -> __v2u64; #[link_name = "llvm.loongarch.lsx.vavg.b"] fn __lsx_vavg_b(a: __v16i8, b: __v16i8) -> __v16i8; #[link_name = "llvm.loongarch.lsx.vavg.h"] @@ -171,38 +123,6 @@ unsafe extern "unadjusted" { fn __lsx_vavgr_wu(a: __v4u32, b: __v4u32) -> __v4u32; #[link_name = "llvm.loongarch.lsx.vavgr.du"] fn __lsx_vavgr_du(a: __v2u64, b: __v2u64) -> __v2u64; - #[link_name = "llvm.loongarch.lsx.vssub.b"] - fn __lsx_vssub_b(a: __v16i8, b: __v16i8) -> __v16i8; - #[link_name = "llvm.loongarch.lsx.vssub.h"] - fn __lsx_vssub_h(a: __v8i16, b: __v8i16) -> __v8i16; - #[link_name = "llvm.loongarch.lsx.vssub.w"] - fn __lsx_vssub_w(a: __v4i32, b: __v4i32) -> __v4i32; - #[link_name = "llvm.loongarch.lsx.vssub.d"] - fn __lsx_vssub_d(a: __v2i64, b: __v2i64) -> __v2i64; - #[link_name = "llvm.loongarch.lsx.vssub.bu"] - fn __lsx_vssub_bu(a: __v16u8, b: __v16u8) -> __v16u8; - #[link_name = "llvm.loongarch.lsx.vssub.hu"] - fn __lsx_vssub_hu(a: __v8u16, b: __v8u16) -> __v8u16; - #[link_name = "llvm.loongarch.lsx.vssub.wu"] - fn __lsx_vssub_wu(a: __v4u32, b: __v4u32) -> __v4u32; - #[link_name = "llvm.loongarch.lsx.vssub.du"] - fn __lsx_vssub_du(a: __v2u64, b: __v2u64) -> __v2u64; - #[link_name = "llvm.loongarch.lsx.vabsd.b"] - fn __lsx_vabsd_b(a: __v16i8, b: __v16i8) -> __v16i8; - #[link_name = "llvm.loongarch.lsx.vabsd.h"] - fn __lsx_vabsd_h(a: __v8i16, b: __v8i16) -> __v8i16; - #[link_name = "llvm.loongarch.lsx.vabsd.w"] - fn __lsx_vabsd_w(a: __v4i32, b: __v4i32) -> __v4i32; - #[link_name = "llvm.loongarch.lsx.vabsd.d"] - fn __lsx_vabsd_d(a: __v2i64, b: __v2i64) -> __v2i64; - #[link_name = "llvm.loongarch.lsx.vabsd.bu"] - fn __lsx_vabsd_bu(a: __v16u8, b: __v16u8) -> __v16u8; - #[link_name = "llvm.loongarch.lsx.vabsd.hu"] - fn __lsx_vabsd_hu(a: __v8u16, b: __v8u16) -> __v8u16; - #[link_name = "llvm.loongarch.lsx.vabsd.wu"] - fn __lsx_vabsd_wu(a: __v4u32, b: __v4u32) -> __v4u32; - #[link_name = "llvm.loongarch.lsx.vabsd.du"] - fn __lsx_vabsd_du(a: __v2u64, b: __v2u64) -> __v2u64; #[link_name = "llvm.loongarch.lsx.vhaddw.h.b"] fn __lsx_vhaddw_h_b(a: __v16i8, b: __v16i8) -> __v8i16; #[link_name = "llvm.loongarch.lsx.vhaddw.w.h"] @@ -243,22 +163,6 @@ unsafe extern "unadjusted" { fn __lsx_vreplvei_w(a: __v4i32, b: u32) -> __v4i32; #[link_name = "llvm.loongarch.lsx.vreplvei.d"] fn __lsx_vreplvei_d(a: __v2i64, b: u32) -> __v2i64; - #[link_name = "llvm.loongarch.lsx.vpickev.b"] - fn __lsx_vpickev_b(a: __v16i8, b: __v16i8) -> __v16i8; - #[link_name = "llvm.loongarch.lsx.vpickev.h"] - fn __lsx_vpickev_h(a: __v8i16, b: __v8i16) -> __v8i16; - #[link_name = "llvm.loongarch.lsx.vpickev.w"] - fn __lsx_vpickev_w(a: __v4i32, b: __v4i32) -> __v4i32; - #[link_name = "llvm.loongarch.lsx.vpickev.d"] - fn __lsx_vpickev_d(a: __v2i64, b: __v2i64) -> __v2i64; - #[link_name = "llvm.loongarch.lsx.vpickod.b"] - fn __lsx_vpickod_b(a: __v16i8, b: __v16i8) -> __v16i8; - #[link_name = "llvm.loongarch.lsx.vpickod.h"] - fn __lsx_vpickod_h(a: __v8i16, b: __v8i16) -> __v8i16; - #[link_name = "llvm.loongarch.lsx.vpickod.w"] - fn __lsx_vpickod_w(a: __v4i32, b: __v4i32) -> __v4i32; - #[link_name = "llvm.loongarch.lsx.vpickod.d"] - fn __lsx_vpickod_d(a: __v2i64, b: __v2i64) -> __v2i64; #[link_name = "llvm.loongarch.lsx.vilvh.b"] fn __lsx_vilvh_b(a: __v16i8, b: __v16i8) -> __v16i8; #[link_name = "llvm.loongarch.lsx.vilvh.h"] @@ -1197,34 +1101,6 @@ pub fn lsx_vsrlri_d(a: m128i) -> m128i { unsafe { transmute(__lsx_vsrlri_d(transmute(a), IMM6)) } } -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vbitclr_b(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vbitclr_b(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vbitclr_h(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vbitclr_h(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vbitclr_w(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vbitclr_w(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vbitclr_d(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vbitclr_d(transmute(a), transmute(b))) } -} - #[inline] #[target_feature(enable = "lsx")] #[rustc_legacy_const_generics(1)] @@ -1261,34 +1137,6 @@ pub fn lsx_vbitclri_d(a: m128i) -> m128i { unsafe { transmute(__lsx_vbitclri_d(transmute(a), IMM6)) } } -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vbitset_b(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vbitset_b(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vbitset_h(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vbitset_h(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vbitset_w(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vbitset_w(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vbitset_d(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vbitset_d(transmute(a), transmute(b))) } -} - #[inline] #[target_feature(enable = "lsx")] #[rustc_legacy_const_generics(1)] @@ -1325,34 +1173,6 @@ pub fn lsx_vbitseti_d(a: m128i) -> m128i { unsafe { transmute(__lsx_vbitseti_d(transmute(a), IMM6)) } } -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vbitrev_b(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vbitrev_b(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vbitrev_h(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vbitrev_h(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vbitrev_w(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vbitrev_w(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vbitrev_d(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vbitrev_d(transmute(a), transmute(b))) } -} - #[inline] #[target_feature(enable = "lsx")] #[rustc_legacy_const_generics(1)] @@ -1497,90 +1317,6 @@ pub fn lsx_vsat_du(a: m128i) -> m128i { unsafe { transmute(__lsx_vsat_du(transmute(a), IMM6)) } } -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vadda_b(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vadda_b(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vadda_h(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vadda_h(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vadda_w(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vadda_w(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vadda_d(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vadda_d(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vsadd_b(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vsadd_b(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vsadd_h(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vsadd_h(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vsadd_w(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vsadd_w(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vsadd_d(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vsadd_d(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vsadd_bu(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vsadd_bu(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vsadd_hu(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vsadd_hu(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vsadd_wu(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vsadd_wu(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vsadd_du(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vsadd_du(transmute(a), transmute(b))) } -} - #[inline] #[target_feature(enable = "lsx")] #[unstable(feature = "stdarch_loongarch", issue = "117427")] @@ -1693,118 +1429,6 @@ pub fn lsx_vavgr_du(a: m128i, b: m128i) -> m128i { unsafe { transmute(__lsx_vavgr_du(transmute(a), transmute(b))) } } -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vssub_b(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vssub_b(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vssub_h(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vssub_h(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vssub_w(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vssub_w(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vssub_d(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vssub_d(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vssub_bu(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vssub_bu(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vssub_hu(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vssub_hu(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vssub_wu(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vssub_wu(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vssub_du(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vssub_du(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vabsd_b(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vabsd_b(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vabsd_h(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vabsd_h(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vabsd_w(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vabsd_w(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vabsd_d(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vabsd_d(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vabsd_bu(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vabsd_bu(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vabsd_hu(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vabsd_hu(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vabsd_wu(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vabsd_wu(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vabsd_du(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vabsd_du(transmute(a), transmute(b))) } -} - #[inline] #[target_feature(enable = "lsx")] #[unstable(feature = "stdarch_loongarch", issue = "117427")] @@ -1953,62 +1577,6 @@ pub fn lsx_vreplvei_d(a: m128i) -> m128i { unsafe { transmute(__lsx_vreplvei_d(transmute(a), IMM1)) } } -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vpickev_b(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vpickev_b(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vpickev_h(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vpickev_h(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vpickev_w(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vpickev_w(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vpickev_d(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vpickev_d(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vpickod_b(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vpickod_b(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vpickod_h(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vpickod_h(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vpickod_w(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vpickod_w(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vpickod_d(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vpickod_d(transmute(a), transmute(b))) } -} - #[inline] #[target_feature(enable = "lsx")] #[unstable(feature = "stdarch_loongarch", issue = "117427")] diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lsx/portable.rs b/library/stdarch/crates/core_arch/src/loongarch64/lsx/portable.rs index 1980000c3d4dd..24f9af851d8c1 100644 --- a/library/stdarch/crates/core_arch/src/loongarch64/lsx/portable.rs +++ b/library/stdarch/crates/core_arch/src/loongarch64/lsx/portable.rs @@ -5,6 +5,54 @@ use crate::core_arch::simd::{self as cs, *}; use crate::intrinsics::simd as is; use crate::mem::transmute; +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +const unsafe fn simd_pickev_b(a: T, b: T) -> T { + simd_shuffle!(b, a, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +const unsafe fn simd_pickev_h(a: T, b: T) -> T { + simd_shuffle!(b, a, [0, 2, 4, 6, 8, 10, 12, 14]) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +const unsafe fn simd_pickev_w(a: T, b: T) -> T { + simd_shuffle!(b, a, [0, 2, 4, 6]) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +const unsafe fn simd_pickev_d(a: T, b: T) -> T { + simd_shuffle!(b, a, [0, 2]) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +const unsafe fn simd_pickod_b(a: T, b: T) -> T { + simd_shuffle!(b, a, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +const unsafe fn simd_pickod_h(a: T, b: T) -> T { + simd_shuffle!(b, a, [1, 3, 5, 7, 9, 11, 13, 15]) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +const unsafe fn simd_pickod_w(a: T, b: T) -> T { + simd_shuffle!(b, a, [1, 3, 5, 7]) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +const unsafe fn simd_pickod_d(a: T, b: T) -> T { + simd_shuffle!(b, a, [1, 3]) +} + impl_vv!("lsx", lsx_vpcnt_b, is::simd_ctpop, m128i, i8x16); impl_vv!("lsx", lsx_vpcnt_h, is::simd_ctpop, m128i, i16x8); impl_vv!("lsx", lsx_vpcnt_w, is::simd_ctpop, m128i, i32x4); @@ -120,6 +168,54 @@ impl_vvv!("lsx", lsx_vsrl_b, ls::simd_shr, m128i, u8x16); impl_vvv!("lsx", lsx_vsrl_h, ls::simd_shr, m128i, u16x8); impl_vvv!("lsx", lsx_vsrl_w, ls::simd_shr, m128i, u32x4); impl_vvv!("lsx", lsx_vsrl_d, ls::simd_shr, m128i, u64x2); +impl_vvv!("lsx", lsx_vbitclr_b, ls::simd_bitclr, m128i, u8x16); +impl_vvv!("lsx", lsx_vbitclr_h, ls::simd_bitclr, m128i, u16x8); +impl_vvv!("lsx", lsx_vbitclr_w, ls::simd_bitclr, m128i, u32x4); +impl_vvv!("lsx", lsx_vbitclr_d, ls::simd_bitclr, m128i, u64x2); +impl_vvv!("lsx", lsx_vbitset_b, ls::simd_bitset, m128i, u8x16); +impl_vvv!("lsx", lsx_vbitset_h, ls::simd_bitset, m128i, u16x8); +impl_vvv!("lsx", lsx_vbitset_w, ls::simd_bitset, m128i, u32x4); +impl_vvv!("lsx", lsx_vbitset_d, ls::simd_bitset, m128i, u64x2); +impl_vvv!("lsx", lsx_vbitrev_b, ls::simd_bitrev, m128i, u8x16); +impl_vvv!("lsx", lsx_vbitrev_h, ls::simd_bitrev, m128i, u16x8); +impl_vvv!("lsx", lsx_vbitrev_w, ls::simd_bitrev, m128i, u32x4); +impl_vvv!("lsx", lsx_vbitrev_d, ls::simd_bitrev, m128i, u64x2); +impl_vvv!("lsx", lsx_vsadd_b, is::simd_saturating_add, m128i, i8x16); +impl_vvv!("lsx", lsx_vsadd_h, is::simd_saturating_add, m128i, i16x8); +impl_vvv!("lsx", lsx_vsadd_w, is::simd_saturating_add, m128i, i32x4); +impl_vvv!("lsx", lsx_vsadd_d, is::simd_saturating_add, m128i, i64x2); +impl_vvv!("lsx", lsx_vsadd_bu, is::simd_saturating_add, m128i, u8x16); +impl_vvv!("lsx", lsx_vsadd_hu, is::simd_saturating_add, m128i, u16x8); +impl_vvv!("lsx", lsx_vsadd_wu, is::simd_saturating_add, m128i, u32x4); +impl_vvv!("lsx", lsx_vsadd_du, is::simd_saturating_add, m128i, u64x2); +impl_vvv!("lsx", lsx_vssub_b, is::simd_saturating_sub, m128i, i8x16); +impl_vvv!("lsx", lsx_vssub_h, is::simd_saturating_sub, m128i, i16x8); +impl_vvv!("lsx", lsx_vssub_w, is::simd_saturating_sub, m128i, i32x4); +impl_vvv!("lsx", lsx_vssub_d, is::simd_saturating_sub, m128i, i64x2); +impl_vvv!("lsx", lsx_vssub_bu, is::simd_saturating_sub, m128i, u8x16); +impl_vvv!("lsx", lsx_vssub_hu, is::simd_saturating_sub, m128i, u16x8); +impl_vvv!("lsx", lsx_vssub_wu, is::simd_saturating_sub, m128i, u32x4); +impl_vvv!("lsx", lsx_vssub_du, is::simd_saturating_sub, m128i, u64x2); +impl_vvv!("lsx", lsx_vadda_b, ls::simd_adda, m128i, i8x16); +impl_vvv!("lsx", lsx_vadda_h, ls::simd_adda, m128i, i16x8); +impl_vvv!("lsx", lsx_vadda_w, ls::simd_adda, m128i, i32x4); +impl_vvv!("lsx", lsx_vadda_d, ls::simd_adda, m128i, i64x2); +impl_vvv!("lsx", lsx_vabsd_b, ls::simd_absd, m128i, i8x16); +impl_vvv!("lsx", lsx_vabsd_h, ls::simd_absd, m128i, i16x8); +impl_vvv!("lsx", lsx_vabsd_w, ls::simd_absd, m128i, i32x4); +impl_vvv!("lsx", lsx_vabsd_d, ls::simd_absd, m128i, i64x2); +impl_vvv!("lsx", lsx_vabsd_bu, ls::simd_absd, m128i, u8x16); +impl_vvv!("lsx", lsx_vabsd_hu, ls::simd_absd, m128i, u16x8); +impl_vvv!("lsx", lsx_vabsd_wu, ls::simd_absd, m128i, u32x4); +impl_vvv!("lsx", lsx_vabsd_du, ls::simd_absd, m128i, u64x2); +impl_vvv!("lsx", lsx_vpickev_b, simd_pickev_b, m128i, i8x16); +impl_vvv!("lsx", lsx_vpickev_h, simd_pickev_h, m128i, i16x8); +impl_vvv!("lsx", lsx_vpickev_w, simd_pickev_w, m128i, i32x4); +impl_vvv!("lsx", lsx_vpickev_d, simd_pickev_d, m128i, i64x2); +impl_vvv!("lsx", lsx_vpickod_b, simd_pickod_b, m128i, i8x16); +impl_vvv!("lsx", lsx_vpickod_h, simd_pickod_h, m128i, i16x8); +impl_vvv!("lsx", lsx_vpickod_w, simd_pickod_w, m128i, i32x4); +impl_vvv!("lsx", lsx_vpickod_d, simd_pickod_d, m128i, i64x2); impl_vuv!("lsx", lsx_vslli_b, is::simd_shl, m128i, i8x16); impl_vuv!("lsx", lsx_vslli_h, is::simd_shl, m128i, i16x8); diff --git a/library/stdarch/crates/core_arch/src/loongarch64/simd.rs b/library/stdarch/crates/core_arch/src/loongarch64/simd.rs index 959e2d0cb68f5..b4ec6881c36ab 100644 --- a/library/stdarch/crates/core_arch/src/loongarch64/simd.rs +++ b/library/stdarch/crates/core_arch/src/loongarch64/simd.rs @@ -45,12 +45,50 @@ impl_simd_ext!(i64x4, i64); impl_simd_ext!(u64x2, u64); impl_simd_ext!(u64x4, u64); +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(crate) const unsafe fn simd_abs(a: T) -> T { + let m: T = is::simd_lt(a, ls::simd_splat(0)); + is::simd_select(m, is::simd_neg(a), a) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(crate) const unsafe fn simd_absd(a: T, b: T) -> T { + let m: T = is::simd_gt(a, b); + is::simd_select(m, is::simd_sub(a, b), is::simd_sub(b, a)) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(crate) const unsafe fn simd_adda(a: T, b: T) -> T { + is::simd_add(ls::simd_abs(a), ls::simd_abs(b)) +} + #[inline(always)] #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] pub(super) const unsafe fn simd_andn(a: T, b: T) -> T { is::simd_and(ls::simd_not(a), b) } +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(super) const unsafe fn simd_bitclr(a: T, b: T) -> T { + ls::simd_andn(ls::simd_shl(ls::simd_splat(1), b), a) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(super) const unsafe fn simd_bitrev(a: T, b: T) -> T { + is::simd_xor(ls::simd_shl(ls::simd_splat(1), b), a) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(super) const unsafe fn simd_bitset(a: T, b: T) -> T { + is::simd_or(ls::simd_shl(ls::simd_splat(1), b), a) +} + #[inline(always)] #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] pub(super) const unsafe fn simd_fmsub(a: T, b: T, c: T) -> T { diff --git a/library/stdarch/crates/core_arch/src/x86/avx.rs b/library/stdarch/crates/core_arch/src/x86/avx.rs index 74fc2db13dcdc..ef434205b52a0 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx.rs @@ -1097,6 +1097,9 @@ pub const fn _mm256_cvtsi256_si32(a: __m256i) -> i32 { /// Zeroes the contents of all XMM or YMM registers. /// +/// This operation is purely a performance hint for the CPU and has no effect on the Abstract +/// Machine state. +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroall) #[inline] #[target_feature(enable = "avx")] @@ -1109,6 +1112,9 @@ pub fn _mm256_zeroall() { /// Zeroes the upper 128 bits of all YMM registers; /// the lower 128-bits of the registers are unmodified. /// +/// This operation is purely a performance hint for the CPU and has no effect on the Abstract +/// Machine state. +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroupper) #[inline] #[target_feature(enable = "avx")] @@ -4007,13 +4013,11 @@ mod tests { } #[simd_test(enable = "avx")] - #[cfg_attr(miri, ignore)] // Register-level operation not supported by Miri fn test_mm256_zeroall() { _mm256_zeroall(); } #[simd_test(enable = "avx")] - #[cfg_attr(miri, ignore)] // Register-level operation not supported by Miri fn test_mm256_zeroupper() { _mm256_zeroupper(); } @@ -4484,7 +4488,7 @@ mod tests { } #[simd_test(enable = "avx")] - #[cfg_attr(miri, ignore)] // Non-temporal store, which is not supported by Miri + #[cfg_attr(miri, ignore)] // Inline asm (for non-temporal store), which is not supported by Miri fn test_mm256_stream_si256() { let a = _mm256_setr_epi64x(1, 2, 3, 4); let mut r = _mm256_undefined_si256(); @@ -4496,7 +4500,7 @@ mod tests { } #[simd_test(enable = "avx")] - #[cfg_attr(miri, ignore)] // Non-temporal store, which is not supported by Miri + #[cfg_attr(miri, ignore)] // Inline asm (for non-temporal store), which is not supported by Miri fn test_mm256_stream_pd() { #[repr(align(32))] struct Memory { @@ -4515,7 +4519,7 @@ mod tests { } #[simd_test(enable = "avx")] - #[cfg_attr(miri, ignore)] // Non-temporal store, which is not supported by Miri + #[cfg_attr(miri, ignore)] // Inline asm (for non-temporal store), which is not supported by Miri fn test_mm256_stream_ps() { #[repr(align(32))] struct Memory { diff --git a/library/stdarch/crates/core_arch/src/x86/avx512f.rs b/library/stdarch/crates/core_arch/src/x86/avx512f.rs index 94c4269c8fe51..66ea63b674f12 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx512f.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx512f.rs @@ -58246,7 +58246,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - #[cfg_attr(miri, ignore)] + #[cfg_attr(miri, ignore)] // Inline asm (for non-temporal store), which is not supported by Miri fn test_mm512_stream_ps() { #[repr(align(64))] struct Memory { @@ -58265,7 +58265,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - #[cfg_attr(miri, ignore)] + #[cfg_attr(miri, ignore)] // Inline asm (for non-temporal store), which is not supported by Miri fn test_mm512_stream_pd() { #[repr(align(64))] struct Memory { @@ -58284,7 +58284,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - #[cfg_attr(miri, ignore)] + #[cfg_attr(miri, ignore)] // Inline asm (for non-temporal store), which is not supported by Miri fn test_mm512_stream_si512() { #[repr(align(64))] struct Memory { diff --git a/library/stdarch/crates/intrinsic-test/missing_x86.txt b/library/stdarch/crates/intrinsic-test/missing_x86.txt index f88a125bfdff8..c7aabb95a84b0 100644 --- a/library/stdarch/crates/intrinsic-test/missing_x86.txt +++ b/library/stdarch/crates/intrinsic-test/missing_x86.txt @@ -45,23 +45,35 @@ _mm_set1_pch _tpause _umwait -# IMM8 must be an even number in the range `0..=62` -_mm_sm3rnds2_epi32 - # SDE ERROR: Cannot execute XGETBV with ECX != 0 _xgetbv # top bits are undefined, unclear how to test these +_mm256_castph128_ph256 +_mm256_castps128_ps256 +_mm256_castpd128_pd256 _mm256_castsi128_si256 + +_mm512_castph128_ph512 +_mm512_castps128_ps512 +_mm512_castpd128_pd512 _mm512_castsi128_si512 + +_mm512_castph256_ph512 +_mm512_castps256_ps512 +_mm512_castpd256_pd512 _mm512_castsi256_si512 # Clang bug -_mm256_extract_epi16 -_mm256_extract_epi8 _mm512_mask_reduce_max_pd _mm512_mask_reduce_max_ps _mm512_mask_reduce_min_pd _mm512_mask_reduce_min_ps -_mm_extract_epi16 -_mm_extract_epi8 + +# Rounding errors in release mode +_mm_maskz_fmadd_sd +_mm_maskz_fmadd_ss +_mm_maskz_fmsub_sd +_mm_maskz_fmsub_ss +_mm_maskz_fnmadd_sd +_mm_maskz_fnmadd_ss diff --git a/library/stdarch/crates/intrinsic-test/src/arm/compile.rs b/library/stdarch/crates/intrinsic-test/src/arm/compile.rs deleted file mode 100644 index a672da2cc0d6b..0000000000000 --- a/library/stdarch/crates/intrinsic-test/src/arm/compile.rs +++ /dev/null @@ -1,51 +0,0 @@ -use crate::common::cli::ProcessedCli; -use crate::common::compile_c::{CompilationCommandBuilder, CppCompilation}; - -pub fn build_cpp_compilation(config: &ProcessedCli) -> Option { - let cpp_compiler = config.cpp_compiler.as_ref()?; - - // -ffp-contract=off emulates Rust's approach of not fusing separate mul-add operations - let mut command = CompilationCommandBuilder::new() - .add_arch_flags(["armv8.6-a", "crypto", "crc", "dotprod", "fp16"]) - .set_compiler(cpp_compiler) - .set_target(&config.target) - .set_opt_level("2") - .set_cxx_toolchain_dir(config.cxx_toolchain_dir.as_deref()) - .set_project_root("c_programs") - .add_extra_flags(["-ffp-contract=off", "-Wno-narrowing"]); - - if !config.target.contains("v7") { - command = command.add_arch_flags(["faminmax", "lut", "sha3", "fp8"]); - } - - if !cpp_compiler.contains("clang") { - command = command.add_extra_flag("-flax-vector-conversions"); - } - - let mut cpp_compiler = command.into_cpp_compilation(); - - if config.target.contains("aarch64_be") { - let Some(ref cxx_toolchain_dir) = config.cxx_toolchain_dir else { - panic!( - "target `{}` must specify `cxx_toolchain_dir`", - config.target - ) - }; - - cpp_compiler.command_mut().args([ - &format!("--sysroot={cxx_toolchain_dir}/aarch64_be-none-linux-gnu/libc"), - "--include-directory", - &format!("{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/include/c++/14.3.1"), - "--include-directory", - &format!("{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/include/c++/14.3.1/aarch64_be-none-linux-gnu"), - "-L", - &format!("{cxx_toolchain_dir}/lib/gcc/aarch64_be-none-linux-gnu/14.3.1"), - "-L", - &format!("{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/libc/usr/lib"), - "-B", - &format!("{cxx_toolchain_dir}/lib/gcc/aarch64_be-none-linux-gnu/14.3.1"), - ]); - } - - Some(cpp_compiler) -} diff --git a/library/stdarch/crates/intrinsic-test/src/arm/config.rs b/library/stdarch/crates/intrinsic-test/src/arm/config.rs index 60bb0ca56cefa..87b8ebfa183e7 100644 --- a/library/stdarch/crates/intrinsic-test/src/arm/config.rs +++ b/library/stdarch/crates/intrinsic-test/src/arm/config.rs @@ -3,51 +3,6 @@ pub const NOTICE: &str = "\ // test are derived from a JSON specification, published under the same license as the // `intrinsic-test` crate.\n"; -pub const PLATFORM_C_FORWARD_DECLARATIONS: &str = r#" -#ifdef __aarch64__ -std::ostream& operator<<(std::ostream& os, poly128_t value); -#endif - -std::ostream& operator<<(std::ostream& os, float16_t value); -std::ostream& operator<<(std::ostream& os, uint8_t value); - -// T1 is the `To` type, T2 is the `From` type -template T1 cast(T2 x) { - static_assert(sizeof(T1) == sizeof(T2), "sizeof T1 and T2 must be the same"); - T1 ret{}; - memcpy(&ret, &x, sizeof(T1)); - return ret; -} -"#; - -pub const PLATFORM_C_DEFINITIONS: &str = r#" -#ifdef __aarch64__ -std::ostream& operator<<(std::ostream& os, poly128_t value) { - std::stringstream temp; - do { - int n = value % 10; - value /= 10; - temp << n; - } while (value != 0); - std::string tempstr(temp.str()); - std::string res(tempstr.rbegin(), tempstr.rend()); - os << res; - return os; -} - -#endif - -std::ostream& operator<<(std::ostream& os, float16_t value) { - os << static_cast(value); - return os; -} - -std::ostream& operator<<(std::ostream& os, uint8_t value) { - os << (unsigned int) value; - return os; -} -"#; - pub const PLATFORM_RUST_DEFINITIONS: &str = ""; pub const PLATFORM_RUST_CFGS: &str = r#" @@ -61,7 +16,6 @@ pub const PLATFORM_RUST_CFGS: &str = r#" #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_feat_lut))] #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fp8))] #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(faminmax))] -#![feature(fmt_helpers_for_derive)] #![feature(stdarch_neon_f16)] #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] diff --git a/library/stdarch/crates/intrinsic-test/src/arm/mod.rs b/library/stdarch/crates/intrinsic-test/src/arm/mod.rs index 99c8da854c506..9bf6c95ffdcb3 100644 --- a/library/stdarch/crates/intrinsic-test/src/arm/mod.rs +++ b/library/stdarch/crates/intrinsic-test/src/arm/mod.rs @@ -1,5 +1,4 @@ mod argument; -mod compile; mod config; mod intrinsic; mod json_parser; @@ -7,7 +6,6 @@ mod types; use crate::common::SupportedArchitectureTest; use crate::common::cli::ProcessedCli; -use crate::common::compile_c::CppCompilation; use crate::common::intrinsic::Intrinsic; use crate::common::intrinsic_helpers::TypeKind; use intrinsic::ArmIntrinsicType; @@ -15,16 +13,11 @@ use json_parser::get_neon_intrinsics; pub struct ArmArchitectureTest { intrinsics: Vec>, - cli_options: ProcessedCli, } impl SupportedArchitectureTest for ArmArchitectureTest { type IntrinsicImpl = ArmIntrinsicType; - fn cli_options(&self) -> &ProcessedCli { - &self.cli_options - } - fn intrinsics(&self) -> &[Intrinsic] { &self.intrinsics } @@ -32,18 +25,16 @@ impl SupportedArchitectureTest for ArmArchitectureTest { const NOTICE: &str = config::NOTICE; const PLATFORM_C_HEADERS: &[&str] = &["arm_neon.h", "arm_acle.h", "arm_fp16.h"]; - const PLATFORM_C_DEFINITIONS: &str = config::PLATFORM_C_DEFINITIONS; - const PLATFORM_C_FORWARD_DECLARATIONS: &str = config::PLATFORM_C_FORWARD_DECLARATIONS; const PLATFORM_RUST_DEFINITIONS: &str = config::PLATFORM_RUST_DEFINITIONS; const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS; - fn cpp_compilation(&self) -> Option { - compile::build_cpp_compilation(&self.cli_options) + fn arch_flags(&self) -> Vec<&str> { + vec!["-march=armv8.6a+crypto+crc+dotprod+fp16"] } fn create(cli_options: ProcessedCli) -> Self { - let a32 = cli_options.target.contains("v7"); + let a32 = cli_options.target.starts_with("armv7"); let mut intrinsics = get_neon_intrinsics(&cli_options.filename, &cli_options.target) .expect("Error parsing input file"); @@ -68,9 +59,6 @@ impl SupportedArchitectureTest for ArmArchitectureTest { .take(sample_size) .collect::>(); - Self { - intrinsics, - cli_options, - } + Self { intrinsics } } } diff --git a/library/stdarch/crates/intrinsic-test/src/arm/types.rs b/library/stdarch/crates/intrinsic-test/src/arm/types.rs index 18468bd5581f8..e9614eba218cb 100644 --- a/library/stdarch/crates/intrinsic-test/src/arm/types.rs +++ b/library/stdarch/crates/intrinsic-test/src/arm/types.rs @@ -1,6 +1,4 @@ use super::intrinsic::ArmIntrinsicType; -use crate::common::cli::Language; -use crate::common::indentation::Indentation; use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, Sign, TypeKind}; impl IntrinsicTypeDefinition for ArmIntrinsicType { @@ -8,8 +6,8 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType { fn c_type(&self) -> String { let prefix = self.kind.c_prefix(); - if let (Some(bit_len), simd_len, vec_len) = (self.bit_len, self.simd_len, self.vec_len) { - match (simd_len, vec_len) { + if let Some(bit_len) = self.bit_len { + match (self.simd_len, self.vec_len) { (None, None) => format!("{prefix}{bit_len}_t"), (Some(simd), None) => format!("{prefix}{bit_len}x{simd}_t"), (Some(simd), Some(vec)) => format!("{prefix}{bit_len}x{simd}x{vec}_t"), @@ -20,19 +18,24 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType { } } - fn c_single_vector_type(&self) -> String { - if let (Some(bit_len), Some(simd_len)) = (self.bit_len, self.simd_len) { - format!( - "{prefix}{bit_len}x{simd_len}_t", - prefix = self.kind.c_prefix() - ) + fn rust_type(&self) -> String { + let rust_prefix = self.kind.rust_prefix(); + let c_prefix = self.kind.c_prefix(); + + if let Some(bit_len) = self.bit_len { + match (self.simd_len, self.vec_len) { + (None, None) => format!("{rust_prefix}{bit_len}"), + (Some(simd), None) => format!("{c_prefix}{bit_len}x{simd}_t"), + (Some(simd), Some(vec)) => format!("{c_prefix}{bit_len}x{simd}x{vec}_t"), + (None, Some(_)) => todo!("{self:#?}"), // Likely an invalid case + } } else { - unreachable!("Shouldn't be called on this type") + todo!("{self:#?}") } } /// Determines the load function for this type. - fn get_load_function(&self, language: Language) -> String { + fn get_load_function(&self) -> String { if let IntrinsicType { kind: k, bit_len: Some(bl), @@ -47,16 +50,13 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType { "" }; - let choose_workaround = language == Language::C && self.target.contains("v7"); format!( "vld{len}{quad}_{type}{size}", type = match k { TypeKind::Int(Sign::Unsigned) => "u", TypeKind::Int(Sign::Signed) => "s", TypeKind::Float => "f", - // The ACLE doesn't support 64-bit polynomial loads on Armv7 - // if armv7 and bl == 64, use "s", else "p" - TypeKind::Poly => if choose_workaround && *bl == 64 {"s"} else {"p"}, + TypeKind::Poly => "p", x => todo!("get_load_function TypeKind: {x:#?}"), }, size = bl, @@ -67,97 +67,6 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType { todo!("get_load_function IntrinsicType: {self:#?}") } } - - /// Determines the get lane function for this type. - fn get_lane_function(&self) -> String { - if let IntrinsicType { - kind: k, - bit_len: Some(bl), - simd_len, - .. - } = &self.data - { - let quad = if (simd_len.unwrap_or(1) * bl) > 64 { - "q" - } else { - "" - }; - format!( - "vget{quad}_lane_{type}{size}", - type = match k { - TypeKind::Int(Sign::Unsigned) => "u", - TypeKind::Int(Sign::Signed) => "s", - TypeKind::Float => "f", - TypeKind::Poly => "p", - x => todo!("get_load_function TypeKind: {x:#?}"), - }, - size = bl, - quad = quad, - ) - } else { - todo!("get_lane_function IntrinsicType: {self:#?}") - } - } - - /// Generates a std::cout for the intrinsics results that will match the - /// rust debug output format for the return type. The generated line assumes - /// there is an int i in scope which is the current pass number. - fn print_result_c(&self, indentation: Indentation, additional: &str) -> String { - let lanes = if self.num_vectors() > 1 { - (0..self.num_vectors()) - .map(|vector| { - format!( - r#""{ty}(" << {lanes} << ")""#, - ty = self.c_single_vector_type(), - lanes = (0..self.num_lanes()) - .map(move |idx| -> std::string::String { - let lane_fn = self.get_lane_function(); - let final_cast = self.generate_final_type_cast(); - format!( - "{final_cast}{lane_fn}(__return_value.val[{vector}], {idx})" - ) - }) - .collect::>() - .join(r#" << ", " << "#) - ) - }) - .collect::>() - .join(r#" << ", " << "#) - } else if self.num_lanes() > 1 { - (0..self.num_lanes()) - .map(|idx| -> std::string::String { - let lane_fn = self.get_lane_function(); - let final_cast = self.generate_final_type_cast(); - format!("{final_cast}{lane_fn}(__return_value, {idx})") - }) - .collect::>() - .join(r#" << ", " << "#) - } else { - format!( - "{promote}cast<{cast}>(__return_value)", - cast = match self.kind() { - TypeKind::Float if self.inner_size() == 16 => "float16_t".to_string(), - TypeKind::Float if self.inner_size() == 32 => "float".to_string(), - TypeKind::Float if self.inner_size() == 64 => "double".to_string(), - TypeKind::Int(Sign::Signed) => format!("int{}_t", self.inner_size()), - TypeKind::Int(Sign::Unsigned) => format!("uint{}_t", self.inner_size()), - TypeKind::Poly => format!("poly{}_t", self.inner_size()), - ty => todo!("print_result_c - Unknown type: {ty:#?}"), - }, - promote = self.generate_final_type_cast(), - ) - }; - - format!( - r#"{indentation}std::cout << "Result {additional}-" << i+1 << ": {ty}" << std::fixed << std::setprecision(150) << {lanes} << "{close}" << std::endl;"#, - ty = if self.is_simd() { - format!("{}(", self.c_type()) - } else { - String::from("") - }, - close = if self.is_simd() { ")" } else { "" }, - ) - } } impl ArmIntrinsicType { diff --git a/library/stdarch/crates/intrinsic-test/src/common/argument.rs b/library/stdarch/crates/intrinsic-test/src/common/argument.rs index 413d5314d3180..885d5e998ef54 100644 --- a/library/stdarch/crates/intrinsic-test/src/common/argument.rs +++ b/library/stdarch/crates/intrinsic-test/src/common/argument.rs @@ -1,5 +1,9 @@ -use super::cli::Language; +use itertools::Itertools; + +use crate::common::intrinsic_helpers::TypeKind; + use super::constraint::Constraint; +use super::gen_rust::PASSES; use super::indentation::Indentation; use super::intrinsic_helpers::IntrinsicTypeDefinition; @@ -49,32 +53,19 @@ where self.constraint.is_some() } - /// The binding keyword (e.g. "const" or "let") for the array of possible test inputs. - fn rust_vals_array_binding(&self) -> impl std::fmt::Display { - if self.ty.is_rust_vals_array_const() { - "const" - } else { - "let" - } - } - /// The name (e.g. "A_VALS" or "a_vals") for the array of possible test inputs. pub(crate) fn rust_vals_array_name(&self) -> impl std::fmt::Display { - if self.ty.is_rust_vals_array_const() { - let loads = crate::common::gen_rust::PASSES; - format!( - "{}_{ty}_{load_size}", - self.name.to_uppercase(), - ty = self.ty.rust_scalar_type(), - load_size = self.ty.num_lanes() * self.ty.num_vectors() + loads - 1, - ) - } else { - format!("{}_vals", self.name.to_lowercase()) - } + let loads = crate::common::gen_rust::PASSES; + format!( + "{ty}_{load_size}", + ty = self.ty.rust_scalar_type().to_uppercase(), + load_size = self.ty.num_lanes() * self.ty.num_vectors() + loads - 1, + ) } - fn as_call_param_c(&self) -> String { - self.ty.as_call_param_c(&self.generate_name()) + pub(crate) fn pass_by_ref(&self) -> bool { + // pass SIMD types and `f16` by reference + self.is_simd() || (self.ty.kind() == TypeKind::Float && self.ty.inner_size() == 16) } } @@ -87,13 +78,50 @@ impl ArgumentList where T: IntrinsicTypeDefinition, { - /// Converts the argument list into the call parameters for a C function call. - /// e.g. this would generate something like `a, &b, c` - pub fn as_call_param_c(&self) -> String { + pub fn as_non_imm_arglist_c(&self) -> String { self.iter() - .map(|arg| arg.as_call_param_c()) - .collect::>() - .join(", ") + .filter(|arg| !arg.has_constraint()) + .format_with("", |arg, fmt| { + if arg.pass_by_ref() { + fmt(&format_args!(", const {}* {}", arg.to_c_type(), arg.name)) + } else { + fmt(&format_args!(", {} {}", arg.to_c_type(), arg.name)) + } + }) + .to_string() + } + + pub fn as_non_imm_arglist_rust(&self) -> String { + self.iter() + .filter(|arg| !arg.has_constraint()) + .format_with("", |arg, fmt| { + if arg.pass_by_ref() { + fmt(&format_args!( + ", {}: *const {}", + arg.name, + arg.ty.rust_type() + )) + } else { + fmt(&format_args!(", {}: {}", arg.name, arg.ty.rust_type())) + } + }) + .to_string() + } + + pub fn as_call_params_c(&self, imm_args: &[i64]) -> String { + let mut imm_args = imm_args.iter(); + self.iter() + .format_with(", ", |arg, fmt| { + if arg.has_constraint() { + fmt(&imm_args.next().unwrap()) + } else { + if arg.pass_by_ref() { + fmt(&"*")?; + } + fmt(&arg.name) + } + }) + .to_string() } /// Converts the argument list into the call parameters for a Rust function. @@ -101,53 +129,21 @@ where pub fn as_call_param_rust(&self) -> String { self.iter() .filter(|a| !a.has_constraint()) - .map(|arg| arg.generate_name() + " as _") - .collect::>() + .map(|arg| arg.generate_name()) .join(", ") } - /// Creates a line for each argument that initializes an array for C from which `loads` argument - /// values can be loaded as a sliding window. - /// e.g `const int32x2_t a_vals = {0x3effffff, 0x3effffff, 0x3f7fffff}`, if loads=2. - pub fn gen_arglists_c( - &self, - w: &mut impl std::io::Write, - indentation: Indentation, - loads: u32, - ) -> std::io::Result<()> { - for arg in self.iter().filter(|&arg| !arg.has_constraint()) { - // Setting the variables on an aligned boundary to make it easier to pick - // functions (of a specific architecture) that would help load the values. - writeln!( - w, - "{indentation}alignas(64) const {ty} {name}_vals[] = {values};", - ty = arg.ty.c_scalar_type(), - name = arg.generate_name(), - values = arg.ty.populate_random(indentation, loads, &Language::C) - )? - } - - Ok(()) - } - - /// Creates a line for each argument that initializes an array for Rust from which `loads` argument - /// values can be loaded as a sliding window, e.g `const A_VALS: [u32; 20] = [...];` - pub fn gen_arglists_rust( - &self, - w: &mut impl std::io::Write, - indentation: Indentation, - loads: u32, - ) -> std::io::Result<()> { - for arg in self.iter().filter(|&arg| !arg.has_constraint()) { - // Constants are defined globally. - if arg.ty.is_rust_vals_array_const() { - continue; - } - - Self::gen_arg_rust(arg, w, indentation, loads)?; - } - - Ok(()) + pub fn as_c_call_param_rust(&self) -> String { + self.iter() + .filter(|a| !a.has_constraint()) + .map(|arg| { + if arg.pass_by_ref() { + format!(", &raw const {}", arg.generate_name()) + } else { + format!(", {}", arg.generate_name()) + } + }) + .join("") } pub fn gen_arg_rust( @@ -158,39 +154,14 @@ where ) -> std::io::Result<()> { writeln!( w, - "{indentation}{bind} {name}: [{ty}; {load_size}] = {values};\n", - bind = arg.rust_vals_array_binding(), + "{indentation}static {name}: [{ty}; {load_size}] = {values};\n", name = arg.rust_vals_array_name(), ty = arg.ty.rust_scalar_type(), load_size = arg.ty.num_lanes() * arg.ty.num_vectors() + loads - 1, - values = arg.ty.populate_random(indentation, loads, &Language::Rust) + values = arg.ty.populate_random(indentation, loads) ) } - /// Creates a line for each argument that initializes the argument from an array `[arg]_vals` at - /// an offset `i` using a load intrinsic, in C. - /// e.g `uint8x8_t a = vld1_u8(&a_vals[i]);` - /// - /// ARM-specific - pub fn load_values_c(&self, indentation: Indentation) -> String { - self.iter() - .filter(|&arg| !arg.has_constraint()) - .enumerate() - .map(|(idx, arg)| { - format!( - "{indentation}{ty} {name} = cast<{ty}>({load}(&{name}_vals[i+{idx}]));\n", - ty = arg.to_c_type(), - name = arg.generate_name(), - load = if arg.is_simd() { - arg.ty.get_load_function(Language::C) - } else { - "*".to_string() - } - ) - }) - .collect() - } - /// Creates a line for each argument that initializes the argument from array `[ARG]_VALS` at /// an offset `i` using a load intrinsic, in Rust. /// e.g `let a = vld1_u8(A_VALS.as_ptr().offset(i));` @@ -199,17 +170,20 @@ where .filter(|&arg| !arg.has_constraint()) .enumerate() .map(|(idx, arg)| { - let load = if arg.is_simd() { - arg.ty.get_load_function(Language::Rust) + if arg.is_simd() { + format!( + "{indentation}let {name} = {load}({vals_name}.as_ptr().add((i+{idx}) % {PASSES}) as _);\n", + name = arg.generate_name(), + vals_name = arg.rust_vals_array_name(), + load = arg.ty.get_load_function(), + ) } else { - "*".to_string() - }; - let typecast = if load.len() > 2 { "as _" } else { "" }; - format!( - "{indentation}let {name} = {load}({vals_name}.as_ptr().offset(i+{idx}){typecast});\n", - name = arg.generate_name(), - vals_name = arg.rust_vals_array_name(), - ) + format!( + "{indentation}let {name} = {vals_name}[(i+{idx}) % {PASSES}];\n", + name = arg.generate_name(), + vals_name = arg.rust_vals_array_name(), + ) + } }) .collect() } diff --git a/library/stdarch/crates/intrinsic-test/src/common/cli.rs b/library/stdarch/crates/intrinsic-test/src/common/cli.rs index bed8259de8b6d..f407b5ceb7d48 100644 --- a/library/stdarch/crates/intrinsic-test/src/common/cli.rs +++ b/library/stdarch/crates/intrinsic-test/src/common/cli.rs @@ -1,12 +1,6 @@ use itertools::Itertools; use std::path::PathBuf; -#[derive(Debug, PartialEq)] -pub enum Language { - Rust, - C, -} - /// Intrinsic test tool #[derive(clap::Parser)] #[command( @@ -17,41 +11,13 @@ pub struct Cli { /// The input file containing the intrinsics pub input: PathBuf, - /// The rust toolchain to use for building the rust code - #[arg(long)] - pub toolchain: Option, - - /// The C++ compiler to use for compiling the c++ code - #[arg(long, default_value_t = String::from("clang++"))] - pub cppcompiler: String, - - /// Run the C programs under emulation with this command - #[arg(long)] - pub runner: Option, - /// Filename for a list of intrinsics to skip (one per line) #[arg(long)] pub skip: Option, - /// Regenerate test programs, but don't build or run them - #[arg(long)] - pub generate_only: bool, - /// Pass a target the test suite - #[arg(long, default_value_t = String::from("armv7-unknown-linux-gnueabihf"))] - pub target: String, - - /// Pass a profile (release, dev) - #[arg(long, default_value_t = String::from("release"))] - pub profile: String, - - /// Set the linker - #[arg(long)] - pub linker: Option, - - /// Set the sysroot for the C++ compiler #[arg(long)] - pub cxx_toolchain_dir: Option, + pub target: String, #[arg(long, default_value_t = 100u8)] pub sample_percentage: u8, @@ -59,13 +25,7 @@ pub struct Cli { pub struct ProcessedCli { pub filename: PathBuf, - pub toolchain: Option, - pub cpp_compiler: Option, - pub runner: String, pub target: String, - pub profile: String, - pub linker: Option, - pub cxx_toolchain_dir: Option, pub skip: Vec, pub sample_percentage: u8, } @@ -73,11 +33,7 @@ pub struct ProcessedCli { impl ProcessedCli { pub fn new(cli_options: Cli) -> Self { let filename = cli_options.input; - let runner = cli_options.runner.unwrap_or_default(); let target = cli_options.target; - let profile = cli_options.profile; - let linker = cli_options.linker; - let cxx_toolchain_dir = cli_options.cxx_toolchain_dir; let sample_percentage = cli_options.sample_percentage; let skip = if let Some(filename) = cli_options.skip { @@ -91,27 +47,8 @@ impl ProcessedCli { Default::default() }; - let (toolchain, cpp_compiler) = if cli_options.generate_only { - (None, None) - } else { - ( - Some( - cli_options - .toolchain - .map_or_else(String::new, |t| format!("+{t}")), - ), - Some(cli_options.cppcompiler), - ) - }; - Self { - toolchain, - cpp_compiler, - runner, target, - profile, - linker, - cxx_toolchain_dir, skip, filename, sample_percentage, diff --git a/library/stdarch/crates/intrinsic-test/src/common/compare.rs b/library/stdarch/crates/intrinsic-test/src/common/compare.rs deleted file mode 100644 index c1438d1bbf8ce..0000000000000 --- a/library/stdarch/crates/intrinsic-test/src/common/compare.rs +++ /dev/null @@ -1,144 +0,0 @@ -use itertools::Itertools; -use rayon::prelude::*; -use std::{collections::HashMap, process::Command}; - -pub const INTRINSIC_DELIMITER: &str = "############"; -fn runner_command(runner: &str) -> Command { - let mut it = runner.split_whitespace(); - let mut cmd = Command::new(it.next().unwrap()); - cmd.args(it); - - cmd -} - -pub fn compare_outputs( - intrinsic_name_list: &Vec, - runner: &str, - target: &str, - profile: &str, -) -> bool { - let profile_dir = match profile { - "dev" => "debug", - _ => "release", - }; - - let (c, rust) = rayon::join( - || { - runner_command(runner) - .arg("./intrinsic-test-programs") - .current_dir("c_programs") - .output() - }, - || { - runner_command(runner) - .arg(format!( - "./target/{target}/{profile_dir}/intrinsic-test-programs" - )) - .current_dir("rust_programs") - .output() - }, - ); - let (c, rust) = match (c, rust) { - (Ok(c), Ok(rust)) => (c, rust), - failure => panic!("Failed to run: {failure:#?}"), - }; - - if !c.status.success() { - error!( - "Failed to run C program.\nstdout: {stdout}\nstderr: {stderr}", - stdout = std::str::from_utf8(&c.stdout).unwrap_or(""), - stderr = std::str::from_utf8(&c.stderr).unwrap_or(""), - ); - } - - if !rust.status.success() { - error!( - "Failed to run Rust program.\nstdout: {stdout}\nstderr: {stderr}", - stdout = std::str::from_utf8(&rust.stdout).unwrap_or(""), - stderr = std::str::from_utf8(&rust.stderr).unwrap_or(""), - ); - } - - info!("Completed running C++ and Rust test binaries"); - let c = std::str::from_utf8(&c.stdout) - .unwrap() - .to_lowercase() - .replace("-nan", "nan"); - let rust = std::str::from_utf8(&rust.stdout) - .unwrap() - .to_lowercase() - .replace("-nan", "nan"); - - let c_output_map = c - .split(INTRINSIC_DELIMITER) - .filter_map(|output| output.trim().split_once("\n")) - .collect::>(); - let rust_output_map = rust - .split(INTRINSIC_DELIMITER) - .filter_map(|output| output.trim().split_once("\n")) - .collect::>(); - - assert!(!c_output_map.is_empty(), "No C intrinsic output found!"); - - let intrinsics = c_output_map - .keys() - .chain(rust_output_map.keys()) - .unique() - .collect_vec(); - - info!("Comparing outputs"); - let intrinsics_diff_count = intrinsics - .par_iter() - .filter_map(|&&intrinsic| { - let c_output = c_output_map.get(intrinsic).unwrap(); - let rust_output = rust_output_map.get(intrinsic).unwrap(); - if rust_output.eq(c_output) { - None - } else { - let diff = diff::lines(c_output, rust_output); - let diffs = diff - .into_iter() - .filter_map(|diff| match diff { - diff::Result::Left(_) | diff::Result::Right(_) => Some(diff), - diff::Result::Both(_, _) => None, - }) - .collect_vec(); - if diffs.len() > 0 { - Some((intrinsic, diffs)) - } else { - None - } - } - }) - .inspect(|(intrinsic, diffs)| { - use std::io::Write; - - let stdout = std::io::stdout(); - let mut out = stdout.lock(); - - writeln!(out, "Difference for intrinsic: {intrinsic}").unwrap(); - diffs.into_iter().for_each(|diff| match diff { - diff::Result::Left(c) => { - writeln!(out, "C: {c}").unwrap(); - } - diff::Result::Right(rust) => { - writeln!(out, "Rust: {rust}").unwrap(); - } - _ => (), - }); - writeln!( - out, - "****************************************************************" - ) - .unwrap(); - }) - .count(); - - println!( - "{} differences found (tested {} intrinsics)", - intrinsics_diff_count, - intrinsic_name_list.len() - ); - - intrinsics_diff_count == 0 -} diff --git a/library/stdarch/crates/intrinsic-test/src/common/compile_c.rs b/library/stdarch/crates/intrinsic-test/src/common/compile_c.rs deleted file mode 100644 index fa78b332a7857..0000000000000 --- a/library/stdarch/crates/intrinsic-test/src/common/compile_c.rs +++ /dev/null @@ -1,136 +0,0 @@ -#[derive(Clone)] -pub struct CompilationCommandBuilder { - compiler: String, - target: Option, - cxx_toolchain_dir: Option, - arch_flags: Vec, - optimization: String, - project_root: Option, - extra_flags: Vec, -} - -impl CompilationCommandBuilder { - pub fn new() -> Self { - Self { - compiler: String::new(), - target: None, - cxx_toolchain_dir: None, - arch_flags: Vec::new(), - optimization: "2".to_string(), - project_root: None, - extra_flags: Vec::new(), - } - } - - pub fn set_compiler(mut self, compiler: &str) -> Self { - self.compiler = compiler.to_string(); - self - } - - pub fn set_target(mut self, target: &str) -> Self { - self.target = Some(target.to_string()); - self - } - - pub fn set_cxx_toolchain_dir(mut self, path: Option<&str>) -> Self { - self.cxx_toolchain_dir = path.map(|p| p.to_string()); - self - } - - pub fn add_arch_flags<'a>(mut self, flags: impl IntoIterator) -> Self { - self.arch_flags - .extend(flags.into_iter().map(|s| s.to_owned())); - - self - } - - pub fn set_opt_level(mut self, optimization: &str) -> Self { - self.optimization = optimization.to_string(); - self - } - - /// Sets the root path of all the generated test files. - pub fn set_project_root(mut self, path: &str) -> Self { - self.project_root = Some(path.to_string()); - self - } - - pub fn add_extra_flags<'a>(mut self, flags: impl IntoIterator) -> Self { - self.extra_flags - .extend(flags.into_iter().map(|s| s.to_owned())); - - self - } - - pub fn add_extra_flag(self, flag: &str) -> Self { - self.add_extra_flags([flag]) - } -} - -impl CompilationCommandBuilder { - pub fn into_cpp_compilation(self) -> CppCompilation { - let mut cpp_compiler = std::process::Command::new(self.compiler); - - if let Some(project_root) = self.project_root { - cpp_compiler.current_dir(project_root); - } - - let flags = std::env::var("CPPFLAGS").unwrap_or("".into()); - cpp_compiler.args(flags.split_whitespace()); - - cpp_compiler.arg(format!("-march={}", self.arch_flags.join("+"))); - - cpp_compiler.arg(format!("-O{}", self.optimization)); - - cpp_compiler.args(self.extra_flags); - - if let Some(target) = &self.target { - cpp_compiler.arg(format!("--target={target}")); - } - - CppCompilation(cpp_compiler) - } -} - -pub struct CppCompilation(std::process::Command); - -fn clone_command(command: &std::process::Command) -> std::process::Command { - let mut cmd = std::process::Command::new(command.get_program()); - if let Some(current_dir) = command.get_current_dir() { - cmd.current_dir(current_dir); - } - cmd.args(command.get_args()); - - for (key, val) in command.get_envs() { - cmd.env(key, val.unwrap_or_default()); - } - - cmd -} - -impl CppCompilation { - pub fn command_mut(&mut self) -> &mut std::process::Command { - &mut self.0 - } - - pub fn compile_object_file( - &self, - input: &str, - output: &str, - ) -> std::io::Result { - let mut cmd = clone_command(&self.0); - cmd.args([input, "-v", "-c", "-o", output]); - cmd.output() - } - - pub fn link_executable( - &self, - inputs: impl Iterator, - output: &str, - ) -> std::io::Result { - let mut cmd = clone_command(&self.0); - cmd.args(inputs); - cmd.args(["-o", output]); - cmd.output() - } -} diff --git a/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs b/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs index 12b5a600d5aef..bdf6f68d58cc2 100644 --- a/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs +++ b/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs @@ -1,173 +1,42 @@ +use itertools::Itertools; + use crate::common::intrinsic::Intrinsic; -use super::argument::Argument; -use super::compare::INTRINSIC_DELIMITER; -use super::indentation::Indentation; use super::intrinsic_helpers::IntrinsicTypeDefinition; -// The number of times each intrinsic will be called. -const PASSES: u32 = 20; -const COMMON_HEADERS: [&str; 7] = [ - "iostream", - "string", - "cstring", - "iomanip", - "sstream", - "type_traits", - "cassert", -]; - -pub fn generate_c_test_loop( - w: &mut impl std::io::Write, - intrinsic: &Intrinsic, - indentation: Indentation, - additional: &str, - passes: u32, -) -> std::io::Result<()> { - let body_indentation = indentation.nested(); - // Successive arguments are offset increasingly from their value array start - let passes = passes + 1 - - intrinsic - .arguments - .iter() - .filter(|&arg| !arg.has_constraint()) - .count() as u32; - writeln!( - w, - "{indentation}for (int i=0; i<{passes}; i++) {{\n\ - {loaded_args}\ - {body_indentation}auto __return_value = {intrinsic_call}({args});\n\ - {print_result}\n\ - {indentation}}}", - loaded_args = intrinsic.arguments.load_values_c(body_indentation), - intrinsic_call = intrinsic.name, - args = intrinsic.arguments.as_call_param_c(), - print_result = intrinsic - .results - .print_result_c(body_indentation, additional) - ) -} - -pub fn generate_c_constraint_blocks<'a, T: IntrinsicTypeDefinition + 'a>( - w: &mut impl std::io::Write, - intrinsic: &Intrinsic, - indentation: Indentation, - constraints: &mut (impl Iterator> + Clone), - name: String, -) -> std::io::Result<()> { - let Some(current) = constraints.next() else { - return generate_c_test_loop(w, intrinsic, indentation, &name, PASSES); - }; - - let body_indentation = indentation.nested(); - for i in current.constraint.iter().flat_map(|c| c.iter()) { - let ty = current.ty.c_type(); - - writeln!(w, "{indentation}{{")?; - - // TODO: Move to actually specifying the enum value - // instead of typecasting integers, for better clarity - // of generated code. - writeln!( - w, - "{body_indentation}const {ty} {} = ({ty}){i};", - current.generate_name() - )?; - - generate_c_constraint_blocks( - w, - intrinsic, - body_indentation, - &mut constraints.clone(), - format!("{name}-{i}"), - )?; - - writeln!(w, "{indentation}}}")?; - } - - Ok(()) -} - -// Compiles C test programs using specified compiler -pub fn create_c_test_function( - w: &mut impl std::io::Write, - intrinsic: &Intrinsic, -) -> std::io::Result<()> { - let indentation = Indentation::default(); - - writeln!(w, "int run_{}() {{", intrinsic.name)?; - - // Define the arrays of arguments. - let arguments = &intrinsic.arguments; - arguments.gen_arglists_c(w, indentation.nested(), PASSES)?; - - generate_c_constraint_blocks( - w, - intrinsic, - indentation.nested(), - &mut arguments.iter().rev().filter(|&i| i.has_constraint()), - Default::default(), - )?; - - writeln!(w, " return 0;")?; - writeln!(w, "}}")?; - - Ok(()) -} - -pub fn write_mod_cpp( +pub fn write_wrapper_c( w: &mut impl std::io::Write, notice: &str, platform_headers: &[&str], - forward_declarations: &str, intrinsics: &[Intrinsic], ) -> std::io::Result<()> { write!(w, "{notice}")?; - for header in COMMON_HEADERS.iter().chain(platform_headers.iter()) { - writeln!(w, "#include <{header}>")?; - } - - writeln!(w, "{}", forward_declarations)?; - - for intrinsic in intrinsics { - create_c_test_function(w, intrinsic)?; - } - - Ok(()) -} + writeln!(w, "#include ")?; + writeln!(w, "#include ")?; -pub fn write_main_cpp<'a>( - w: &mut impl std::io::Write, - arch_specific_definitions: &str, - arch_specific_headers: &[&str], - intrinsics: impl Iterator + Clone, -) -> std::io::Result<()> { - for header in COMMON_HEADERS.iter().chain(arch_specific_headers.iter()) { + for header in platform_headers { writeln!(w, "#include <{header}>")?; } - // NOTE: It's assumed that this value contains the required `ifdef`s. - writeln!(w, "{arch_specific_definitions }")?; - - for intrinsic in intrinsics.clone() { - writeln!(w, "extern int run_{intrinsic}(void);")?; - } - - writeln!(w, "int main(int argc, char **argv) {{")?; - for intrinsic in intrinsics { - writeln!( - w, - " std::cout << \"{INTRINSIC_DELIMITER}\" << std::endl;" - )?; - writeln!(w, " std::cout << \"{intrinsic}\" << std::endl;")?; - writeln!(w, " run_{intrinsic}();\n")?; + intrinsic.iter_specializations(|imm_values| { + writeln!( + w, + " +void {name}_wrapper{imm_arglist}({return_ty}* __dst{arglist}) {{ + *__dst = {name}({params}); +}}", + return_ty = intrinsic.results.c_type(), + name = intrinsic.name, + imm_arglist = imm_values + .iter() + .format_with("", |i, fmt| fmt(&format_args!("_{i}"))), + arglist = intrinsic.arguments.as_non_imm_arglist_c(), + params = intrinsic.arguments.as_call_params_c(&imm_values) + ) + })?; } - writeln!(w, " return 0;")?; - - writeln!(w, "}}")?; - Ok(()) } diff --git a/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs b/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs index 5a0bc7b4d4b97..d23710451d478 100644 --- a/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs +++ b/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs @@ -1,23 +1,54 @@ use itertools::Itertools; -use std::process::Command; -use super::compare::INTRINSIC_DELIMITER; use super::indentation::Indentation; use super::intrinsic_helpers::IntrinsicTypeDefinition; use crate::common::argument::ArgumentList; use crate::common::intrinsic::Intrinsic; +use crate::common::intrinsic_helpers::TypeKind; // The number of times each intrinsic will be called. pub(crate) const PASSES: u32 = 20; +// we need a reflexive equality relation, so treat NaNs as equal +const COMMON_RUST_DEFINITIONS: &str = r#" +macro_rules! wrap_partialeq { + ($($wrapper:ident ($inner:ty)),*) => {$( + #[derive(Debug, Copy, Clone)] + #[repr(transparent)] + pub struct $wrapper($inner); + + impl PartialEq for $wrapper { + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 || (self.0.is_nan() && other.0.is_nan()) + } + } + + impl Eq for $wrapper {} + )*} +} + +wrap_partialeq!(NanEqF16(f16), NanEqF32(f32), NanEqF64(f64)); +"#; + macro_rules! concatln { ($($lines:expr),* $(,)?) => { concat!($( $lines, "\n" ),*) }; } -fn write_cargo_toml_header(w: &mut impl std::io::Write, name: &str) -> std::io::Result<()> { - writeln!( +pub fn write_bin_cargo_toml( + w: &mut impl std::io::Write, + module_count: usize, +) -> std::io::Result<()> { + write!(w, concatln!("[workspace]", "members = ["))?; + for i in 0..module_count { + writeln!(w, " \"mod_{i}\",")?; + } + writeln!(w, "]") +} + +pub fn write_lib_cargo_toml(w: &mut impl std::io::Write, name: &str) -> std::io::Result<()> { + write!( w, concatln!( "[package]", @@ -26,6 +57,12 @@ fn write_cargo_toml_header(w: &mut impl std::io::Write, name: &str) -> std::io:: "authors = [{authors}]", "license = \"{license}\"", "edition = \"2018\"", + "", + "[dependencies]", + "core_arch = {{ path = \"../../crates/core_arch\" }}", + "", + "[build-dependencies]", + "cc = \"1\"" ), name = name, version = env!("CARGO_PKG_VERSION"), @@ -36,72 +73,12 @@ fn write_cargo_toml_header(w: &mut impl std::io::Write, name: &str) -> std::io:: ) } -pub fn write_bin_cargo_toml( - w: &mut impl std::io::Write, - module_count: usize, -) -> std::io::Result<()> { - write_cargo_toml_header(w, "intrinsic-test-programs")?; - - writeln!(w, "[dependencies]")?; - writeln!(w, "core_arch = {{ path = \"../crates/core_arch\" }}")?; - - for i in 0..module_count { - writeln!(w, "mod_{i} = {{ path = \"mod_{i}/\" }}")?; - } - - Ok(()) -} - -pub fn write_lib_cargo_toml(w: &mut impl std::io::Write, name: &str) -> std::io::Result<()> { - write_cargo_toml_header(w, name)?; - - writeln!(w, "[dependencies]")?; - writeln!(w, "core_arch = {{ path = \"../../crates/core_arch\" }}")?; - - Ok(()) -} - -pub fn write_main_rs<'a>( - w: &mut impl std::io::Write, - chunk_count: usize, - cfg: &str, - definitions: &str, - intrinsics: impl Iterator + Clone, -) -> std::io::Result<()> { - writeln!(w, "#![feature(simd_ffi)]")?; - writeln!(w, "#![feature(f16)]")?; - writeln!(w, "#![allow(unused)]")?; - - // Cargo will spam the logs if these warnings are not silenced. - writeln!(w, "#![allow(non_upper_case_globals)]")?; - writeln!(w, "#![allow(non_camel_case_types)]")?; - writeln!(w, "#![allow(non_snake_case)]")?; - - writeln!(w, "{cfg}")?; - writeln!(w, "{definitions}")?; - - for module in 0..chunk_count { - writeln!(w, "use mod_{module}::*;")?; - } - - writeln!(w, "fn main() {{")?; - - for binary in intrinsics { - writeln!(w, " println!(\"{INTRINSIC_DELIMITER}\");")?; - writeln!(w, " println!(\"{binary}\");")?; - writeln!(w, " run_{binary}();\n")?; - } - - writeln!(w, "}}")?; - - Ok(()) -} - pub fn write_lib_rs( w: &mut impl std::io::Write, notice: &str, cfg: &str, definitions: &str, + i: usize, intrinsics: &[Intrinsic], ) -> std::io::Result<()> { write!(w, "{notice}")?; @@ -117,13 +94,15 @@ pub fn write_lib_rs( writeln!(w, "{cfg}")?; + writeln!(w, "{}", COMMON_RUST_DEFINITIONS)?; + writeln!(w, "{definitions}")?; let mut seen = std::collections::HashSet::new(); for intrinsic in intrinsics { for arg in &intrinsic.arguments.args { - if !arg.has_constraint() && arg.ty.is_rust_vals_array_const() { + if !arg.has_constraint() { let name = arg.rust_vals_array_name().to_string(); if seen.insert(name) { @@ -133,196 +112,189 @@ pub fn write_lib_rs( } } + write_bindings_rust(w, i, intrinsics)?; + for intrinsic in intrinsics { - crate::common::gen_rust::create_rust_test_module(w, intrinsic)?; + create_rust_test(w, intrinsic)?; } Ok(()) } -pub fn compile_rust_programs( - toolchain: Option<&str>, - target: &str, - profile: &str, - linker: Option<&str>, -) -> bool { - /* If there has been a linker explicitly set from the command line then - * we want to set it via setting it in the RUSTFLAGS*/ - - // This is done because `toolchain` is None when - // the --generate-only flag is passed - if toolchain.is_none() { - return true; - } - - trace!("Building cargo command"); - - let mut cargo_command = Command::new("cargo"); - cargo_command.current_dir("rust_programs"); - - // Do not use the target directory of the workspace please. - cargo_command.env("CARGO_TARGET_DIR", "target"); - - if toolchain.is_some_and(|val| !val.is_empty()) { - cargo_command.arg(toolchain.unwrap()); - } - cargo_command.args(["build", "--target", target, "--profile", profile]); - - let mut rust_flags = "-Cdebuginfo=0".to_string(); - if let Some(linker) = linker { - rust_flags.push_str(" -C linker="); - rust_flags.push_str(linker); - rust_flags.push_str(" -C link-args=-static"); - - cargo_command.env("CPPFLAGS", "-fuse-ld=lld"); - } - - cargo_command.env("RUSTFLAGS", rust_flags); - - trace!("running cargo"); - - if log::log_enabled!(log::Level::Trace) { - cargo_command.stdout(std::process::Stdio::inherit()); - cargo_command.stderr(std::process::Stdio::inherit()); - } - - let output = cargo_command.output(); - trace!("cargo is done"); - - if let Ok(output) = output { - if output.status.success() { - true - } else { - error!( - "Failed to compile code for rust intrinsics\n\nstdout:\n{}\n\nstderr:\n{}", - std::str::from_utf8(&output.stdout).unwrap_or(""), - std::str::from_utf8(&output.stderr).unwrap_or("") - ); - false - } - } else { - error!("Command failed: {output:#?}"); - false - } -} - -pub fn generate_rust_test_loop( +fn generate_rust_test_loop( w: &mut impl std::io::Write, intrinsic: &Intrinsic, - indentation: Indentation, - specializations: &[Vec], passes: u32, ) -> std::io::Result<()> { let intrinsic_name = &intrinsic.name; - let passes = passes + 1 - - intrinsic - .arguments - .iter() - .filter(|&arg| !arg.has_constraint()) - .count() as u32; // Each function (and each specialization) has its own type. Erase that type with a cast. - let mut coerce = String::from("unsafe fn("); + let mut coerce = String::from("fn("); + let mut c_coerce = String::from("fn(_, "); for _ in intrinsic.arguments.iter().filter(|a| !a.has_constraint()) { coerce += "_, "; + c_coerce += "_, "; } coerce += ") -> _"; - - match specializations { - [] => { - writeln!(w, " let specializations = [(\"\", {intrinsic_name})];")?; - } - [const_args] if const_args.is_empty() => { - writeln!(w, " let specializations = [(\"\", {intrinsic_name})];")?; - } - _ => { - writeln!(w, " let specializations = [")?; - - for specialization in specializations { - let mut specialization: Vec<_> = - specialization.iter().map(|d| d.to_string()).collect(); - - let const_args = specialization.join(","); - - // The identifier is reversed. - specialization.reverse(); - let id = specialization.join("-"); - - writeln!( - w, - " (\"-{id}\", {intrinsic_name}::<{const_args}> as {coerce})," - )?; - } - - writeln!(w, " ];")?; - } + c_coerce += ")"; + + if intrinsic + .arguments + .iter() + .filter(|arg| arg.has_constraint()) + .count() + == 0 + { + writeln!( + w, + " let specializations = [(\"\", {intrinsic_name}, {intrinsic_name}_wrapper)];" + )?; + } else { + writeln!(w, " let specializations = [")?; + + intrinsic.iter_specializations(|imm_values| { + writeln!( + w, + " (\"{const_args}\", {intrinsic_name}::<{const_args}> as unsafe {coerce}, {intrinsic_name}_wrapper_{c_const_args} as unsafe extern \"C\" {c_coerce}),", + const_args = imm_values.iter().join(","), + c_const_args = imm_values.iter().join("_"), + ) + })?; + + writeln!(w, " ];")?; } + let (cast_prefix, cast_suffix) = if intrinsic.results.is_simd() { + ( + format!( + "std::mem::transmute::<_, [{}; {}]>(", + intrinsic.results.rust_scalar_type().replace("f", "NanEqF"), + intrinsic.results.num_lanes() * intrinsic.results.num_vectors() + ), + ")", + ) + } else if intrinsic.results.kind == TypeKind::Float { + ( + match intrinsic.results.inner_size() { + 16 => format!("NanEqF16("), + 32 => format!("NanEqF32("), + 64 => format!("NanEqF64("), + _ => unimplemented!(), + }, + ")", + ) + } else { + ("".to_string(), "") + }; + write!( w, concatln!( - " for (id, f) in specializations {{", + " for (id, rust, c) in specializations {{", " for i in 0..{passes} {{", " unsafe {{", "{loaded_args}", - " let __return_value = f({args});", - " println!(\"Result {{id}}-{{}}: {{:?}}\", i + 1, {return_value});", + " let __rust_return_value = rust({rust_args});", + "", + " let mut __c_return_value = std::mem::MaybeUninit::uninit();", + " c(__c_return_value.as_mut_ptr(){c_args});", + " let __c_return_value = __c_return_value.assume_init();", + "", + " assert_eq!({cast_prefix}__rust_return_value{cast_suffix}, {cast_prefix}__c_return_value{cast_suffix}, \"{{id}}\");", " }}", " }}", " }}", ), - loaded_args = intrinsic.arguments.load_values_rust(indentation.nest_by(4)), - args = intrinsic.arguments.as_call_param_rust(), - return_value = intrinsic.results.print_result_rust(), + loaded_args = intrinsic + .arguments + .load_values_rust(Indentation::default().nest_by(4)), + rust_args = intrinsic.arguments.as_call_param_rust(), + c_args = intrinsic.arguments.as_c_call_param_rust(), passes = passes, + cast_prefix = cast_prefix, + cast_suffix = cast_suffix, ) } -/// Generate the specializations (unique sequences of const-generic arguments) for this intrinsic. -fn generate_rust_specializations( - constraints: &mut impl Iterator>, -) -> Vec> { - let mut specializations = vec![vec![]]; - - for constraint in constraints { - specializations = constraint - .flat_map(|right| { - specializations.iter().map(move |left| { - let mut left = left.clone(); - left.push(i32::try_from(right).unwrap()); - left - }) - }) - .collect(); - } +fn create_rust_test( + w: &mut impl std::io::Write, + intrinsic: &Intrinsic, +) -> std::io::Result<()> { + trace!("generating `{}`", intrinsic.name); + + write!( + w, + concatln!("#[test]", "fn test_{intrinsic_name}() {{"), + intrinsic_name = intrinsic.name, + )?; + + generate_rust_test_loop(w, intrinsic, PASSES)?; + + writeln!(w, "}}")?; - specializations + Ok(()) } -// Top-level function to create complete test program -pub fn create_rust_test_module( +pub fn write_bindings_rust( w: &mut impl std::io::Write, - intrinsic: &Intrinsic, + i: usize, + intrinsics: &[Intrinsic], ) -> std::io::Result<()> { - trace!("generating `{}`", intrinsic.name); - let indentation = Indentation::default(); + write!( + w, + concatln!( + "#[allow(improper_ctypes)]", + "#[link(name = \"wrapper_{i}\")]", + "unsafe extern \"C\" {{" + ), + i = i + )?; - writeln!(w, "pub fn run_{}() {{", intrinsic.name)?; + for intrinsic in intrinsics { + intrinsic.iter_specializations(|imm_values| { + writeln!( + w, + " fn {name}_wrapper{imm_arglist}(__dst: *mut {return_ty}{arglist});", + return_ty = intrinsic.results.rust_type(), + name = intrinsic.name, + imm_arglist = imm_values + .iter() + .format_with("", |i, fmt| fmt(&format_args!("_{i}"))), + arglist = intrinsic.arguments.as_non_imm_arglist_rust(), + ) + })?; + } - // Define the arrays of arguments. - let arguments = &intrinsic.arguments; - arguments.gen_arglists_rust(w, indentation.nested(), PASSES)?; + writeln!(w, "}}") +} - // Define any const generics as `const` items, then generate the actual test loop. - let specializations = generate_rust_specializations( - &mut arguments - .iter() - .filter_map(|i| i.constraint.as_ref().map(|v| v.iter())), - ); +pub fn write_build_rs( + w: &mut impl std::io::Write, + i: usize, + arch_flags: &[&str], +) -> std::io::Result<()> { + const COMMON_FLAGS: &[&str] = &["-ffp-contract=off", "-ffp-model=strict", "-Wno-narrowing"]; - generate_rust_test_loop(w, intrinsic, indentation, &specializations, PASSES)?; + write!( + w, + concatln!( + "fn main() {{", + " cc::Build::new()", + " .file(\"../../c_programs/wrapper_{i}.c\")", + " .opt_level(2)", + " .flags(&[", + ), + i = i + )?; - writeln!(w, "}}")?; + let indentation = Indentation::default().nest_by(2); + for flag in COMMON_FLAGS.iter().chain(arch_flags) { + writeln!(w, "{indentation}\"{flag}\",")?; + } - Ok(()) + write!( + w, + concatln!(" ])", " .compile(\"wrapper_{i}\");", "}}"), + i = i + ) } diff --git a/library/stdarch/crates/intrinsic-test/src/common/intrinsic.rs b/library/stdarch/crates/intrinsic-test/src/common/intrinsic.rs index 81f6d6d8b5b2c..76e5959153d07 100644 --- a/library/stdarch/crates/intrinsic-test/src/common/intrinsic.rs +++ b/library/stdarch/crates/intrinsic-test/src/common/intrinsic.rs @@ -1,3 +1,5 @@ +use crate::common::constraint::Constraint; + use super::argument::ArgumentList; use super::intrinsic_helpers::IntrinsicTypeDefinition; @@ -16,3 +18,36 @@ pub struct Intrinsic { /// Any architecture-specific tags. pub arch_tags: Vec, } + +fn recurse_specializations<'a, E>( + constraints: &mut (impl Iterator + Clone), + imm_values: &mut Vec, + f: &mut impl FnMut(&[i64]) -> Result<(), E>, +) -> Result<(), E> { + if let Some(current) = constraints.next() { + for i in current.iter() { + imm_values.push(i); + recurse_specializations(&mut constraints.clone(), imm_values, f)?; + imm_values.pop(); + } + Ok(()) + } else { + f(&imm_values) + } +} + +impl Intrinsic { + pub fn iter_specializations( + &self, + mut f: impl FnMut(&[i64]) -> Result<(), E>, + ) -> Result<(), E> { + recurse_specializations( + &mut self + .arguments + .iter() + .filter_map(|arg| arg.constraint.as_ref()), + &mut Vec::new(), + &mut f, + ) + } +} diff --git a/library/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/library/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs index a14d7ef05f539..ab4a565200bc8 100644 --- a/library/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs +++ b/library/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs @@ -5,7 +5,6 @@ use std::str::FromStr; use itertools::Itertools as _; -use super::cli::Language; use super::indentation::Indentation; use super::values::value_for_array; @@ -94,6 +93,7 @@ impl TypeKind { Self::Poly => "u", Self::Char(Sign::Unsigned) => "u", Self::Char(Sign::Signed) => "i", + Self::Mask => "u", _ => unreachable!("Unused type kind: {self:#?}"), } } @@ -154,67 +154,7 @@ impl IntrinsicType { self.ptr } - pub fn c_scalar_type(&self) -> String { - match self.kind() { - TypeKind::Char(_) => String::from("char"), - TypeKind::Vector => String::from("int32_t"), - _ => format!( - "{prefix}{bits}_t", - prefix = self.kind().c_prefix(), - bits = self.inner_size() - ), - } - } - - pub fn c_promotion(&self) -> &str { - match *self { - IntrinsicType { - kind, - bit_len: Some(8), - .. - } => match kind { - TypeKind::Int(Sign::Signed) => "int", - TypeKind::Int(Sign::Unsigned) => "unsigned int", - TypeKind::Poly => "uint8_t", - _ => "", - }, - IntrinsicType { - kind: TypeKind::Poly, - bit_len: Some(bit_len), - .. - } => match bit_len { - 8 => unreachable!("handled above"), - 16 => "uint16_t", - 32 => "uint32_t", - 64 => "uint64_t", - 128 => "", - _ => panic!("invalid bit_len"), - }, - IntrinsicType { - kind: TypeKind::Float, - bit_len: Some(bit_len), - .. - } => match bit_len { - 16 => "float16_t", - 32 => "float", - 64 => "double", - 128 => "", - _ => panic!("invalid bit_len"), - }, - IntrinsicType { - kind: TypeKind::Char(_), - .. - } => "char", - _ => "", - } - } - - pub fn populate_random( - &self, - indentation: Indentation, - loads: u32, - language: &Language, - ) -> String { + pub fn populate_random(&self, indentation: Indentation, loads: u32) -> String { match self { IntrinsicType { bit_len: Some(bit_len @ (1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 16 | 32 | 64)), @@ -224,13 +164,9 @@ impl IntrinsicType { vec_len, .. } => { - let (prefix, suffix) = match language { - Language::Rust => ('[', ']'), - Language::C => ('{', '}'), - }; let body_indentation = indentation.nested(); format!( - "{prefix}\n{body}\n{indentation}{suffix}", + "[\n{body}\n{indentation}]", body = (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1) + loads - 1)) .format_with(",\n", |i, fmt| { let src = value_for_array(*bit_len, i); @@ -241,13 +177,7 @@ impl IntrinsicType { let mask = !0u64 >> (64 - *bit_len); let ones_compl = src ^ mask; let twos_compl = ones_compl + 1; - if (twos_compl == src) && (language == &Language::C) { - // `src` is INT*_MIN. C requires `-0x7fffffff - 1` to avoid - // undefined literal overflow behaviour. - fmt(&format_args!("{body_indentation}-{ones_compl:#x} - 1")) - } else { - fmt(&format_args!("{body_indentation}-{twos_compl:#x}")) - } + fmt(&format_args!("{body_indentation}-{twos_compl:#x}")) } else { fmt(&format_args!("{body_indentation}{src:#x}")) } @@ -261,20 +191,11 @@ impl IntrinsicType { vec_len, .. } => { - let (prefix, cast_prefix, cast_suffix, suffix) = match (language, bit_len) { - (&Language::Rust, 16) => ('[', "f16::from_bits(", ")", ']'), - (&Language::Rust, 32) => ('[', "f32::from_bits(", ")", ']'), - (&Language::Rust, 64) => ('[', "f64::from_bits(", ")", ']'), - (&Language::C, 16) => ('{', "cast(", ")", '}'), - (&Language::C, 32) => ('{', "cast(", ")", '}'), - (&Language::C, 64) => ('{', "cast(", ")", '}'), - _ => unreachable!(), - }; format!( - "{prefix}\n{body}\n{indentation}{suffix}", + "[\n{body}\n{indentation}]", body = (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1) + loads - 1)) .format_with(",\n", |i, fmt| fmt(&format_args!( - "{indentation}{cast_prefix}{src:#x}{cast_suffix}", + "{indentation}f{bit_len}::from_bits({src:#x})", indentation = indentation.nested(), src = value_for_array(*bit_len, i) ))) @@ -287,14 +208,10 @@ impl IntrinsicType { vec_len, .. } => { - let (prefix, suffix) = match language { - Language::Rust => ('[', ']'), - Language::C => ('{', '}'), - }; let body_indentation = indentation.nested(); let effective_bit_len = 32; format!( - "{prefix}\n{body}\n{indentation}{suffix}", + "[\n{body}\n{indentation}]", body = (0..(vec_len.unwrap_or(1) * simd_len.unwrap_or(1) + loads - 1)) .format_with(",\n", |i, fmt| { let src = value_for_array(effective_bit_len, i); @@ -304,13 +221,7 @@ impl IntrinsicType { let mask = !0u64 >> (64 - effective_bit_len); let ones_compl = src ^ mask; let twos_compl = ones_compl + 1; - if (twos_compl == src) && (language == &Language::C) { - // `src` is INT*_MIN. C requires `-0x7fffffff - 1` to avoid - // undefined literal overflow behaviour. - fmt(&format_args!("{body_indentation}-{ones_compl:#x} - 1")) - } else { - fmt(&format_args!("{body_indentation}-{twos_compl:#x}")) - } + fmt(&format_args!("{body_indentation}-{twos_compl:#x}")) } else { fmt(&format_args!("{body_indentation}{src:#x}")) } @@ -320,73 +231,31 @@ impl IntrinsicType { _ => unimplemented!("populate random: {self:#?}"), } } - - pub fn is_rust_vals_array_const(&self) -> bool { - match self { - // Floats have to be loaded at runtime for stable NaN conversion. - IntrinsicType { - kind: TypeKind::Float, - .. - } => false, - IntrinsicType { - kind: TypeKind::Int(_) | TypeKind::Poly, - .. - } => true, - _ => true, - } - } - - pub fn as_call_param_c(&self, name: &String) -> String { - if self.ptr { - format!("&{name}") - } else { - name.clone() - } - } } pub trait IntrinsicTypeDefinition: Deref { /// Determines the load function for this type. /// can be implemented in an `impl` block - fn get_load_function(&self, _language: Language) -> String; - - /// can be implemented in an `impl` block - fn get_lane_function(&self) -> String; + fn get_load_function(&self) -> String; /// Gets a string containing the typename for this type in C format. /// can be directly defined in `impl` blocks fn c_type(&self) -> String; + /// Gets a string containing the typename for this type in Rust format. /// can be directly defined in `impl` blocks - fn c_single_vector_type(&self) -> String; - - /// Generates a std::cout for the intrinsics results that will match the - /// rust debug output format for the return type. The generated line assumes - /// there is an int i in scope which is the current pass number. - fn print_result_c(&self, indentation: Indentation, additional: &str) -> String; - - /// Generates a std::cout for the intrinsics results that will match the - /// rust debug output format for the return type. The generated line assumes - /// there is an int i in scope which is the current pass number. - fn print_result_rust(&self) -> String { - String::from("format_args!(\"{__return_value:.150?}\")") - } + fn rust_type(&self) -> String; /// To enable architecture-specific logic fn rust_scalar_type(&self) -> String { - format!( - "{prefix}{bits}", - prefix = self.kind().rust_prefix(), - bits = self.inner_size() - ) - } - - fn generate_final_type_cast(&self) -> String { - let type_data = self.c_promotion(); - if type_data.len() > 2 { - format!("({type_data})") + if self.is_simd() { + format!( + "{prefix}{bits}", + prefix = self.kind().rust_prefix(), + bits = self.inner_size() + ) } else { - String::new() + self.rust_type() } } } diff --git a/library/stdarch/crates/intrinsic-test/src/common/mod.rs b/library/stdarch/crates/intrinsic-test/src/common/mod.rs index a1062b3a87dbc..86849f7db34e0 100644 --- a/library/stdarch/crates/intrinsic-test/src/common/mod.rs +++ b/library/stdarch/crates/intrinsic-test/src/common/mod.rs @@ -1,38 +1,32 @@ -use std::fs::File; +use std::{fs::File, io}; use rayon::prelude::*; use cli::ProcessedCli; use crate::common::{ - compile_c::CppCompilation, - gen_c::{write_main_cpp, write_mod_cpp}, - gen_rust::{ - compile_rust_programs, write_bin_cargo_toml, write_lib_cargo_toml, write_lib_rs, - write_main_rs, - }, + gen_c::write_wrapper_c, + gen_rust::{write_bin_cargo_toml, write_build_rs, write_lib_cargo_toml, write_lib_rs}, intrinsic::Intrinsic, intrinsic_helpers::IntrinsicTypeDefinition, }; pub mod argument; pub mod cli; -pub mod compare; -pub mod compile_c; pub mod constraint; -pub mod gen_c; -pub mod gen_rust; -pub mod indentation; pub mod intrinsic; pub mod intrinsic_helpers; -pub mod values; + +mod gen_c; +mod gen_rust; +mod indentation; +mod values; /// Architectures must support this trait /// to be successfully tested. pub trait SupportedArchitectureTest { type IntrinsicImpl: IntrinsicTypeDefinition + Sync; - fn cli_options(&self) -> &ProcessedCli; fn intrinsics(&self) -> &[Intrinsic]; fn create(cli_options: ProcessedCli) -> Self; @@ -40,118 +34,40 @@ pub trait SupportedArchitectureTest { const NOTICE: &str; const PLATFORM_C_HEADERS: &[&str]; - const PLATFORM_C_DEFINITIONS: &str; - const PLATFORM_C_FORWARD_DECLARATIONS: &str; const PLATFORM_RUST_CFGS: &str; const PLATFORM_RUST_DEFINITIONS: &str; - fn cpp_compilation(&self) -> Option; - - fn build_c_file(&self) -> bool { - let (chunk_size, chunk_count) = manual_chunk(self.intrinsics().len(), 400); + fn arch_flags(&self) -> Vec<&str>; - let cpp_compiler_wrapped = self.cpp_compilation(); + fn generate_c_file(&self) { + let (chunk_size, _chunk_count) = manual_chunk(self.intrinsics().len()); std::fs::create_dir_all("c_programs").unwrap(); self.intrinsics() .par_chunks(chunk_size) .enumerate() .map(|(i, chunk)| { - let c_filename = format!("c_programs/mod_{i}.cpp"); + let c_filename = format!("c_programs/wrapper_{i}.c"); let mut file = File::create(&c_filename).unwrap(); - let mod_file_write_result = write_mod_cpp( - &mut file, - Self::NOTICE, - Self::PLATFORM_C_HEADERS, - Self::PLATFORM_C_FORWARD_DECLARATIONS, - chunk, - ); - - if let Err(error) = mod_file_write_result { - return Err(format!("Error writing to mod_{i}.cpp: {error:?}")); - } - - // compile this cpp file into a .o file. - // - // This is done because `cpp_compiler_wrapped` is None when - // the --generate-only flag is passed - trace!("compiling mod_{i}.cpp"); - if let Some(cpp_compiler) = cpp_compiler_wrapped.as_ref() { - let compile_output = cpp_compiler - .compile_object_file(&format!("mod_{i}.cpp"), &format!("mod_{i}.o")) - .map_err(|e| format!("Error compiling mod_{i}.cpp: {e:?}"))?; - - assert!( - compile_output.status.success(), - "{}", - String::from_utf8_lossy(&compile_output.stderr) - ); - - trace!("finished compiling mod_{i}.cpp"); - } - Ok(()) + write_wrapper_c(&mut file, Self::NOTICE, Self::PLATFORM_C_HEADERS, chunk) }) - .collect::>() + .collect::>() .unwrap(); - - let mut file = File::create("c_programs/main.cpp").unwrap(); - write_main_cpp( - &mut file, - Self::PLATFORM_C_DEFINITIONS, - Self::PLATFORM_C_HEADERS, - self.intrinsics().iter().map(|i| i.name.as_str()), - ) - .unwrap(); - - // This is done because `cpp_compiler_wrapped` is None when - // the --generate-only flag is passed - if let Some(cpp_compiler) = cpp_compiler_wrapped.as_ref() { - // compile this cpp file into a .o file - trace!("compiling main.cpp"); - let output = cpp_compiler - .compile_object_file("main.cpp", "intrinsic-test-programs.o") - .unwrap(); - assert!(output.status.success(), "{output:?}"); - - let object_files = (0..chunk_count) - .map(|i| format!("mod_{i}.o")) - .chain(["intrinsic-test-programs.o".to_owned()]); - - let output = cpp_compiler - .link_executable(object_files, "intrinsic-test-programs") - .unwrap(); - assert!(output.status.success(), "{output:?}"); - } - - true } - fn build_rust_file(&self) -> bool { - std::fs::create_dir_all("rust_programs/src").unwrap(); + fn generate_rust_file(&self) { + let arch_flags = self.arch_flags(); - let (chunk_size, chunk_count) = manual_chunk(self.intrinsics().len(), 400); + std::fs::create_dir_all("rust_programs").unwrap(); + + let (chunk_size, chunk_count) = manual_chunk(self.intrinsics().len()); let mut cargo = File::create("rust_programs/Cargo.toml").unwrap(); write_bin_cargo_toml(&mut cargo, chunk_count).unwrap(); - let mut main_rs = File::create("rust_programs/src/main.rs").unwrap(); - write_main_rs( - &mut main_rs, - chunk_count, - Self::PLATFORM_RUST_CFGS, - "", - self.intrinsics().iter().map(|i| i.name.as_str()), - ) - .unwrap(); - - let target = &self.cli_options().target; - let profile = &self.cli_options().profile; - let toolchain = self.cli_options().toolchain.as_deref(); - let linker = self.cli_options().linker.as_deref(); - self.intrinsics() - .par_chunks(chunk_size) + .chunks(chunk_size) .enumerate() .map(|(i, chunk)| { std::fs::create_dir_all(format!("rust_programs/mod_{i}/src"))?; @@ -165,6 +81,7 @@ pub trait SupportedArchitectureTest { Self::NOTICE, Self::PLATFORM_RUST_CFGS, Self::PLATFORM_RUST_DEFINITIONS, + i, chunk, )?; @@ -174,41 +91,20 @@ pub trait SupportedArchitectureTest { write_lib_cargo_toml(&mut file, &format!("mod_{i}"))?; + let build_rs_filename = format!("rust_programs/mod_{i}/build.rs"); + trace!("generating `{build_rs_filename}`"); + let mut file = File::create(build_rs_filename).unwrap(); + + write_build_rs(&mut file, i, &arch_flags).unwrap(); + Ok(()) }) .collect::>() .unwrap(); - - compile_rust_programs(toolchain, target, profile, linker) - } - - fn compare_outputs(&self) -> bool { - if self.cli_options().toolchain.is_some() { - let intrinsics_name_list = self - .intrinsics() - .iter() - .map(|i| i.name.clone()) - .collect::>(); - - compare::compare_outputs( - &intrinsics_name_list, - &self.cli_options().runner, - &self.cli_options().target, - &self.cli_options().profile, - ) - } else { - true - } } } -// pub fn chunk_info(intrinsic_count: usize) -> (usize, usize) { -// let available_parallelism = std::thread::available_parallelism().unwrap().get(); -// let chunk_size = intrinsic_count.div_ceil(Ord::min(available_parallelism, intrinsic_count)); - -// (chunk_size, intrinsic_count.div_ceil(chunk_size)) -// } - -pub fn manual_chunk(intrinsic_count: usize, chunk_size: usize) -> (usize, usize) { - (chunk_size, intrinsic_count.div_ceil(chunk_size)) +pub fn manual_chunk(intrinsic_count: usize) -> (usize, usize) { + let ncores = std::thread::available_parallelism().unwrap().into(); + (intrinsic_count.div_ceil(ncores), ncores) } diff --git a/library/stdarch/crates/intrinsic-test/src/main.rs b/library/stdarch/crates/intrinsic-test/src/main.rs index e5c846877ce6d..9f57c99f12cf5 100644 --- a/library/stdarch/crates/intrinsic-test/src/main.rs +++ b/library/stdarch/crates/intrinsic-test/src/main.rs @@ -15,27 +15,21 @@ fn main() { let args: Cli = clap::Parser::parse(); let processed_cli_options = ProcessedCli::new(args); - match processed_cli_options.target.as_str() { - "aarch64-unknown-linux-gnu" - | "armv7-unknown-linux-gnueabihf" - | "aarch64_be-unknown-linux-gnu" => run(ArmArchitectureTest::create(processed_cli_options)), - - "x86_64-unknown-linux-gnu" => run(X86ArchitectureTest::create(processed_cli_options)), - _ => std::process::exit(0), + if processed_cli_options.target.starts_with("arm") + | processed_cli_options.target.starts_with("aarch64") + { + run(ArmArchitectureTest::create(processed_cli_options)) + } else if processed_cli_options.target.starts_with("x86") { + run(X86ArchitectureTest::create(processed_cli_options)) + } else { + unimplemented!("Unsupported target {}", processed_cli_options.target) } } fn run(test_environment: impl SupportedArchitectureTest) { info!("building C binaries"); - if !test_environment.build_c_file() { - std::process::exit(2); - } + test_environment.generate_c_file(); + info!("building Rust binaries"); - if !test_environment.build_rust_file() { - std::process::exit(3); - } - info!("Running binaries"); - if !test_environment.compare_outputs() { - std::process::exit(1); - } + test_environment.generate_rust_file(); } diff --git a/library/stdarch/crates/intrinsic-test/src/x86/compile.rs b/library/stdarch/crates/intrinsic-test/src/x86/compile.rs deleted file mode 100644 index 65cd291b1b377..0000000000000 --- a/library/stdarch/crates/intrinsic-test/src/x86/compile.rs +++ /dev/null @@ -1,59 +0,0 @@ -use crate::common::cli::ProcessedCli; -use crate::common::compile_c::{CompilationCommandBuilder, CppCompilation}; - -pub fn build_cpp_compilation(config: &ProcessedCli) -> Option { - let cpp_compiler = config.cpp_compiler.as_ref()?; - - // -ffp-contract=off emulates Rust's approach of not fusing separate mul-add operations - let mut command = CompilationCommandBuilder::new() - .add_arch_flags(["icelake-client"]) - .set_compiler(cpp_compiler) - .set_target(&config.target) - .set_opt_level("2") - .set_cxx_toolchain_dir(config.cxx_toolchain_dir.as_deref()) - .set_project_root("c_programs") - .add_extra_flags(vec![ - "-ffp-contract=off", - "-Wno-narrowing", - "-mavx", - "-mavx2", - "-mavx512f", - "-msse2", - "-mavx512vl", - "-mavx512bw", - "-mavx512dq", - "-mavx512cd", - "-mavx512fp16", - "-msha512", - "-msm3", - "-msm4", - "-mavxvnni", - "-mavxvnniint8", - "-mavxneconvert", - "-mavxifma", - "-mavxvnniint16", - "-mavx512bf16", - "-mavx512bitalg", - "-mavx512ifma", - "-mavx512vbmi", - "-mavx512vbmi2", - "-mavx512vnni", - "-mavx512vpopcntdq", - "-mavx512vp2intersect", - "-mbmi", - "-mbmi2", - "-mgfni", - "-mvaes", - "-mvpclmulqdq", - "-ferror-limit=1000", - "-std=c++23", - ]); - - if !cpp_compiler.contains("clang") { - command = command.add_extra_flag("-flax-vector-conversions"); - } - - let cpp_compiler = command.into_cpp_compilation(); - - Some(cpp_compiler) -} diff --git a/library/stdarch/crates/intrinsic-test/src/x86/config.rs b/library/stdarch/crates/intrinsic-test/src/x86/config.rs index 491dbb5147bbc..68737ab5ac4c8 100644 --- a/library/stdarch/crates/intrinsic-test/src/x86/config.rs +++ b/library/stdarch/crates/intrinsic-test/src/x86/config.rs @@ -3,7 +3,6 @@ pub const NOTICE: &str = "\ // test are derived from an XML specification, published under the same license as the // `intrinsic-test` crate.\n"; -// Format f16 values (and vectors containing them) in a way that is consistent with C. pub const PLATFORM_RUST_DEFINITIONS: &str = r#" use core_arch::arch::x86_64::*; @@ -129,206 +128,11 @@ unsafe fn _mm512_loadu_epi64_to___m512(mem_addr: *const i64) -> __m512 { _mm512_castsi512_ps(_mm512_loadu_epi64(mem_addr)) } -#[inline] -fn debug_simd_finish( - formatter: &mut core::fmt::Formatter<'_>, - type_name: &str, - array: &[T; N], -) -> core::fmt::Result { - core::fmt::Formatter::debug_tuple_fields_finish( - formatter, - type_name, - &core::array::from_fn::<&dyn core::fmt::Debug, N, _>(|i| &array[i]), - ) -} - -trait DebugAs { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result; -} - -impl DebugAs for T { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "{self}") - } -} - -macro_rules! impl_debug_as { - ($simd:ty, $name:expr, $bits:expr, [$($type:ty),+]) => { - $( - impl DebugAs<$type> for $simd { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - const ELEMENT_BITS: usize = core::mem::size_of::<$type>() * 8; - const NUM_ELEMENTS: usize = $bits / ELEMENT_BITS; - let array = unsafe { core::mem::transmute::<_, [$type; NUM_ELEMENTS]>(*self) }; - debug_simd_finish(f, $name, &array) - } - } - )+ - }; -} - -impl_debug_as!(__m128i, "__m128i", 128, [u8, i8, u16, i16, u32, i32, u64, i64, f16]); -impl_debug_as!(__m256i, "__m256i", 256, [u8, i8, u16, i16, u32, i32, u64, i64]); -impl_debug_as!(__m512i, "__m512i", 512, [u8, i8, u16, i16, u32, i32, u64, i64]); -impl_debug_as!(__m128h, "__m128h", 128, [f32]); -impl_debug_as!(__m256h, "__m256h", 256, [f32]); -impl_debug_as!(__m512h, "__m512h", 512, [f32]); - -fn debug_as(x: V) -> impl core::fmt::Debug -where V: DebugAs -{ - struct DebugWrapper(V, core::marker::PhantomData); - impl, T> core::fmt::Debug for DebugWrapper { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - self.0.fmt(f) - } - } - DebugWrapper(x, core::marker::PhantomData) -} - -"#; - -pub const PLATFORM_C_FORWARD_DECLARATIONS: &str = r#" -#ifndef X86_DECLARATIONS -#define X86_DECLARATIONS - typedef _Float16 float16_t; - typedef float float32_t; - typedef double float64_t; - - #define __int64 long long - #define __int32 int - - std::ostream& operator<<(std::ostream& os, _Float16 value); - std::ostream& operator<<(std::ostream& os, __m128i value); - std::ostream& operator<<(std::ostream& os, __m256i value); - std::ostream& operator<<(std::ostream& os, __m512i value); - std::ostream& operator<<(std::ostream& os, __mmask8 value); - - #define _mm512_extract_intrinsic_test_epi8(m, lane) \ - _mm_extract_epi8(_mm512_extracti64x2_epi64((m), (lane) / 16), (lane) % 16) - - #define _mm512_extract_intrinsic_test_epi16(m, lane) \ - _mm_extract_epi16(_mm512_extracti64x2_epi64((m), (lane) / 8), (lane) % 8) - - #define _mm512_extract_intrinsic_test_epi32(m, lane) \ - _mm_extract_epi32(_mm512_extracti64x2_epi64((m), (lane) / 4), (lane) % 4) - - #define _mm512_extract_intrinsic_test_epi64(m, lane) \ - _mm_extract_epi64(_mm512_extracti64x2_epi64((m), (lane) / 2), (lane) % 2) - - // Load f16 (__m128h) and cast to integer (__m128i) - #define _mm_loadu_ph_to___m128i(mem_addr) _mm_castph_si128(_mm_loadu_ph(mem_addr)) - #define _mm256_loadu_ph_to___m256i(mem_addr) _mm256_castph_si256(_mm256_loadu_ph(mem_addr)) - #define _mm512_loadu_ph_to___m512i(mem_addr) _mm512_castph_si512(_mm512_loadu_ph(mem_addr)) - - // Load f32 (__m128) and cast to f16 (__m128h) - #define _mm_loadu_ps_to___m128h(mem_addr) _mm_castps_ph(_mm_loadu_ps(mem_addr)) - #define _mm256_loadu_ps_to___m256h(mem_addr) _mm256_castps_ph(_mm256_loadu_ps(mem_addr)) - #define _mm512_loadu_ps_to___m512h(mem_addr) _mm512_castps_ph(_mm512_loadu_ps(mem_addr)) - - // Load integer types and cast to double (__m128d, __m256d, __m512d) - #define _mm_loadu_epi16_to___m128d(mem_addr) _mm_castsi128_pd(_mm_loadu_si128((__m128i const*)(mem_addr))) - #define _mm256_loadu_epi16_to___m256d(mem_addr) _mm256_castsi256_pd(_mm256_loadu_si256((__m256i const*)(mem_addr))) - #define _mm512_loadu_epi16_to___m512d(mem_addr) _mm512_castsi512_pd(_mm512_loadu_si512((__m512i const*)(mem_addr))) - - #define _mm_loadu_epi32_to___m128d(mem_addr) _mm_castsi128_pd(_mm_loadu_si128((__m128i const*)(mem_addr))) - #define _mm256_loadu_epi32_to___m256d(mem_addr) _mm256_castsi256_pd(_mm256_loadu_si256((__m256i const*)(mem_addr))) - #define _mm512_loadu_epi32_to___m512d(mem_addr) _mm512_castsi512_pd(_mm512_loadu_si512((__m512i const*)(mem_addr))) - - #define _mm_loadu_epi64_to___m128d(mem_addr) _mm_castsi128_pd(_mm_loadu_si128((__m128i const*)(mem_addr))) - #define _mm256_loadu_epi64_to___m256d(mem_addr) _mm256_castsi256_pd(_mm256_loadu_si256((__m256i const*)(mem_addr))) - #define _mm512_loadu_epi64_to___m512d(mem_addr) _mm512_castsi512_pd(_mm512_loadu_si512((__m512i const*)(mem_addr))) - - // Load integer types and cast to float (__m128, __m256, __m512) - #define _mm_loadu_epi16_to___m128(mem_addr) _mm_castsi128_ps(_mm_loadu_si128((__m128i const*)(mem_addr))) - #define _mm256_loadu_epi16_to___m256(mem_addr) _mm256_castsi256_ps(_mm256_loadu_si256((__m256i const*)(mem_addr))) - #define _mm512_loadu_epi16_to___m512(mem_addr) _mm512_castsi512_ps(_mm512_loadu_si512((__m512i const*)(mem_addr))) - - #define _mm_loadu_epi32_to___m128(mem_addr) _mm_castsi128_ps(_mm_loadu_si128((__m128i const*)(mem_addr))) - #define _mm256_loadu_epi32_to___m256(mem_addr) _mm256_castsi256_ps(_mm256_loadu_si256((__m256i const*)(mem_addr))) - #define _mm512_loadu_epi32_to___m512(mem_addr) _mm512_castsi512_ps(_mm512_loadu_si512((__m512i const*)(mem_addr))) - - #define _mm_loadu_epi64_to___m128(mem_addr) _mm_castsi128_ps(_mm_loadu_si128((__m128i const*)(mem_addr))) - #define _mm256_loadu_epi64_to___m256(mem_addr) _mm256_castsi256_ps(_mm256_loadu_si256((__m256i const*)(mem_addr))) - #define _mm512_loadu_epi64_to___m512(mem_addr) _mm512_castsi512_ps(_mm512_loadu_si512((__m512i const*)(mem_addr))) - - // T1 is the `To` type, T2 is the `From` type - template T1 cast(T2 x) { - if constexpr ((std::is_integral_v && std::is_integral_v) || (std::is_floating_point_v && std::is_floating_point_v)) { - return x; - } else if constexpr (sizeof(T1) <= sizeof(T2)) { - T1 ret{}; - std::memcpy(&ret, &x, sizeof(T1)); - return ret; - } else { - static_assert(sizeof(T1) == sizeof(T2) || std::is_convertible_v, - "T2 must either be convertible to T1, or have the same size as T1!"); - return T1{}; - } - } -#endif -"#; -pub const PLATFORM_C_DEFINITIONS: &str = r#" - -std::ostream& operator<<(std::ostream& os, _Float16 value) { - os << static_cast(value); - return os; -} - -std::ostream& operator<<(std::ostream& os, __m128i value) { - void* temp = malloc(sizeof(__m128i)); - _mm_storeu_si128((__m128i*)temp, value); - std::stringstream ss; - - ss << "0x"; - for(int i = 0; i < 16; i++) { - ss << std::setfill('0') << std::setw(2) << std::hex << ((char*)temp)[i]; - } - os << ss.str(); - return os; -} - -std::ostream& operator<<(std::ostream& os, __m256i value) { - void* temp = malloc(sizeof(__m256i)); - _mm256_storeu_si256((__m256i*)temp, value); - std::stringstream ss; - - ss << "0x"; - for(int i = 0; i < 32; i++) { - ss << std::setfill('0') << std::setw(2) << std::hex << ((char*)temp)[i]; - } - os << ss.str(); - return os; -} - -std::ostream& operator<<(std::ostream& os, __m512i value) { - void* temp = malloc(sizeof(__m512i)); - _mm512_storeu_si512((__m512i*)temp, value); - std::stringstream ss; - - ss << "0x"; - for(int i = 0; i < 64; i++) { - ss << std::setfill('0') << std::setw(2) << std::hex << ((char*)temp)[i]; - } - os << ss.str(); - return os; -} - -std::ostream& operator<<(std::ostream& os, __mmask8 value) { - os << static_cast(value); - return os; -} "#; pub const PLATFORM_RUST_CFGS: &str = r#" -#![cfg_attr(target_arch = "x86", feature(avx))] -#![cfg_attr(target_arch = "x86", feature(sse))] -#![cfg_attr(target_arch = "x86", feature(sse2))] -#![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512_bf16))] -#![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512_f16))] -#![cfg_attr(target_arch = "x86", feature(stdarch_x86_rtm))] -#![cfg_attr(target_arch = "x86", feature(stdarch_x86_rtm))] -#![cfg_attr(target_arch = "x86_64", feature(x86_amx_intrinsics))] -#![cfg_attr(target_arch = "x86_64", feature(stdarch_x86_avx512_f16))] -#![feature(fmt_helpers_for_derive)] +#![feature(stdarch_x86_avx512_bf16)] +#![feature(stdarch_x86_avx512_f16)] +#![feature(stdarch_x86_rtm)] +#![feature(x86_amx_intrinsics)] "#; diff --git a/library/stdarch/crates/intrinsic-test/src/x86/constraint.rs b/library/stdarch/crates/intrinsic-test/src/x86/constraint.rs index 72f5da3b3faf3..608ffdd1eed96 100644 --- a/library/stdarch/crates/intrinsic-test/src/x86/constraint.rs +++ b/library/stdarch/crates/intrinsic-test/src/x86/constraint.rs @@ -1,7 +1,10 @@ use crate::common::constraint::Constraint; -pub fn map_constraints(imm_type: &String, imm_width: u32) -> Option { +pub fn map_constraints(fn_name: &str, imm_type: &String, imm_width: u32) -> Option { if imm_width > 0 { + if fn_name == "_mm_sm3rnds2_epi32" { + return Some(Constraint::Set((0..64).step_by(2).collect())); + } let max: i64 = 2i64.pow(imm_width); return Some(Constraint::Range(0..max)); } diff --git a/library/stdarch/crates/intrinsic-test/src/x86/mod.rs b/library/stdarch/crates/intrinsic-test/src/x86/mod.rs index f2baf070714c1..5d4798482a1d3 100644 --- a/library/stdarch/crates/intrinsic-test/src/x86/mod.rs +++ b/library/stdarch/crates/intrinsic-test/src/x86/mod.rs @@ -1,4 +1,3 @@ -mod compile; mod config; mod constraint; mod intrinsic; @@ -7,7 +6,6 @@ mod xml_parser; use crate::common::SupportedArchitectureTest; use crate::common::cli::ProcessedCli; -use crate::common::compile_c::CppCompilation; use crate::common::intrinsic::Intrinsic; use crate::common::intrinsic_helpers::TypeKind; use intrinsic::X86IntrinsicType; @@ -15,33 +13,59 @@ use xml_parser::get_xml_intrinsics; pub struct X86ArchitectureTest { intrinsics: Vec>, - cli_options: ProcessedCli, } impl SupportedArchitectureTest for X86ArchitectureTest { type IntrinsicImpl = X86IntrinsicType; - fn cli_options(&self) -> &ProcessedCli { - &self.cli_options - } - fn intrinsics(&self) -> &[Intrinsic] { &self.intrinsics } - fn cpp_compilation(&self) -> Option { - compile::build_cpp_compilation(&self.cli_options) - } - const NOTICE: &str = config::NOTICE; - const PLATFORM_C_HEADERS: &[&str] = &["immintrin.h", "cstddef", "cstdint"]; - const PLATFORM_C_DEFINITIONS: &str = config::PLATFORM_C_DEFINITIONS; - const PLATFORM_C_FORWARD_DECLARATIONS: &str = config::PLATFORM_C_FORWARD_DECLARATIONS; + const PLATFORM_C_HEADERS: &[&str] = &["immintrin.h"]; const PLATFORM_RUST_DEFINITIONS: &str = config::PLATFORM_RUST_DEFINITIONS; const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS; + fn arch_flags(&self) -> Vec<&str> { + vec![ + "-mavx", + "-mavx2", + "-mavx512f", + "-msse2", + "-mavx512vl", + "-mavx512bw", + "-mavx512dq", + "-mavx512cd", + "-mavx512fp16", + "-msha", + "-msha512", + "-msm3", + "-msm4", + "-mavxvnni", + "-mavxvnniint8", + "-mavxneconvert", + "-mavxifma", + "-mavxvnniint16", + "-mavx512bf16", + "-mavx512bitalg", + "-mavx512ifma", + "-mavx512vbmi", + "-mavx512vbmi2", + "-mavx512vnni", + "-mavx512vpopcntdq", + "-mavx512vp2intersect", + "-mbmi", + "-mbmi2", + "-mgfni", + "-mvaes", + "-mvpclmulqdq", + "-mlzcnt", + ] + } + fn create(cli_options: ProcessedCli) -> Self { let mut intrinsics = get_xml_intrinsics(&cli_options.filename).expect("Error parsing input file"); @@ -67,9 +91,6 @@ impl SupportedArchitectureTest for X86ArchitectureTest { .take(sample_size) .collect::>(); - Self { - intrinsics: intrinsics, - cli_options: cli_options, - } + Self { intrinsics } } } diff --git a/library/stdarch/crates/intrinsic-test/src/x86/types.rs b/library/stdarch/crates/intrinsic-test/src/x86/types.rs index 2391ee9c2df49..c6ea15e150752 100644 --- a/library/stdarch/crates/intrinsic-test/src/x86/types.rs +++ b/library/stdarch/crates/intrinsic-test/src/x86/types.rs @@ -1,11 +1,8 @@ use std::str::FromStr; use itertools::Itertools; -use regex::Regex; use super::intrinsic::X86IntrinsicType; -use crate::common::cli::Language; -use crate::common::indentation::Indentation; use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, Sign, TypeKind}; use crate::x86::xml_parser::Parameter; @@ -26,82 +23,32 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { .replace("const ", "") } - fn c_single_vector_type(&self) -> String { - // matches __m128, __m256 and similar types - let re = Regex::new(r"__m\d+").unwrap(); - if re.is_match(self.param.type_data.as_str()) { - self.param.type_data.clone() - } else { - unreachable!("Shouldn't be called on this type") + fn rust_type(&self) -> String { + let type_data = &*self.param.type_data; + if type_data.starts_with("__m") { + return type_data.to_owned(); + } + match &*type_data.replace("const ", "") { + "_Float16" => "f16", + "__bfloat16" => "bf16", + "float" => "f32", + "double" => "f64", + "__int8" | "char" => "i8", + "unsigned char" => "u8", + "__int16" | "short" => "i16", + "unsigned short" => "u16", + "__int32" | "int" => "i32", + "unsigned __int32" | "unsigned int" | "unsigned long" => "u32", + "__int64" | "long long" => "i64", + "unsigned __int64" => "u64", + "size_t" => "usize", + _ => todo!("unknown type {type_data}"), } + .to_string() } - // fn rust_type(&self) -> String { - // // handling edge cases first - // // the general handling is implemented below - // if let Some(val) = self.metadata.get("type") { - // match val.as_str() { - // "__m128 const *" => { - // return "&__m128".to_string(); - // } - // "__m128d const *" => { - // return "&__m128d".to_string(); - // } - // "const void*" => { - // return "&__m128d".to_string(); - // } - // _ => {} - // } - // } - - // if self.kind() == TypeKind::Void && self.ptr { - // // this has been handled by default settings in - // // the from_param function of X86IntrinsicType - // unreachable!() - // } - - // // general handling cases - // let core_part = if self.kind() == TypeKind::Mask { - // // all types of __mmask are handled here - // format!("__mask{}", self.bit_len.unwrap()) - // } else if self.simd_len.is_some() { - // // all types of __m vector types are handled here - // let re = Regex::new(r"\__m\d+[a-z]*").unwrap(); - // let rust_type = self - // .metadata - // .get("type") - // .map(|val| re.find(val).unwrap().as_str()); - // rust_type.unwrap().to_string() - // } else { - // format!( - // "{}{}", - // self.kind.rust_prefix().to_string(), - // self.bit_len.unwrap() - // ) - // }; - - // // extracting "memsize" so that even vector types can be involved - // let memwidth = self - // .metadata - // .get("memwidth") - // .map(|n| str::parse::(n).unwrap()); - // let prefix_part = if self.ptr && self.constant && self.bit_len.eq(&memwidth) { - // "&" - // } else if self.ptr && self.bit_len.eq(&memwidth) { - // "&mut " - // } else if self.ptr && self.constant { - // "*const " - // } else if self.ptr { - // "*mut " - // } else { - // "" - // }; - - // return prefix_part.to_string() + core_part.as_str(); - // } - /// Determines the load function for this type. - fn get_load_function(&self, _language: Language) -> String { + fn get_load_function(&self) -> String { let type_value = self.param.type_data.clone(); if type_value.len() == 0 { unimplemented!("the value for key 'type' is not present!"); @@ -168,112 +115,16 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { } } - /// Generates a std::cout for the intrinsics results that will match the - /// rust debug output format for the return type. The generated line assumes - /// there is an int i in scope which is the current pass number. - fn print_result_c(&self, indentation: Indentation, additional: &str) -> String { - let lanes = if self.num_lanes() > 1 { - (0..self.num_lanes()) - .map(|idx| -> std::string::String { - let cast_type = self.c_promotion(); - let lane_fn = self.get_lane_function(); - if cast_type.len() > 2 { - format!("cast<{cast_type}>({lane_fn}(__return_value, {idx}))") - } else { - format!("{lane_fn}(__return_value, {idx})") - } - }) - .collect::>() - .join(r#" << ", " << "#) - } else { + fn rust_scalar_type(&self) -> String { + if self.is_simd() { format!( - "{promote}cast<{cast}>(__return_value)", - cast = match self.kind() { - TypeKind::Void => "void".to_string(), - TypeKind::Float if self.inner_size() == 64 => "double".to_string(), - TypeKind::Float if self.inner_size() == 32 => "float".to_string(), - TypeKind::Mask => format!( - "__mmask{}", - self.bit_len.expect(format!("self: {self:#?}").as_str()) - ), - TypeKind::Vector => format!( - "__m{}i", - self.bit_len.expect(format!("self: {self:#?}").as_str()) - ), - _ => self.c_scalar_type(), - }, - promote = self.generate_final_type_cast(), + "{prefix}{bits}", + prefix = self.kind().rust_prefix(), + bits = self.inner_size() ) - }; - - format!( - r#"{indentation}std::cout << "Result {additional}-" << i+1 << ": {ty}" << std::fixed << std::setprecision(150) << {lanes} << "{close}" << std::endl;"#, - ty = if self.is_simd() { - format!("{}(", self.c_type()) - } else { - String::from("") - }, - close = if self.is_simd() { ")" } else { "" }, - ) - } - - /// Determines the get lane function for this type. - fn get_lane_function(&self) -> String { - let total_vector_bits: Option = self - .simd_len - .zip(self.bit_len) - .and_then(|(simd_len, bit_len)| Some(simd_len * bit_len)); - - match (self.bit_len, total_vector_bits) { - (Some(8), Some(128)) => String::from("(uint8_t)_mm_extract_epi8"), - (Some(16), Some(128)) => String::from("(uint16_t)_mm_extract_epi16"), - (Some(32), Some(128)) => String::from("(uint32_t)_mm_extract_epi32"), - (Some(64), Some(128)) => String::from("(uint64_t)_mm_extract_epi64"), - (Some(8), Some(256)) => String::from("(uint8_t)_mm256_extract_epi8"), - (Some(16), Some(256)) => String::from("(uint16_t)_mm256_extract_epi16"), - (Some(32), Some(256)) => String::from("(uint32_t)_mm256_extract_epi32"), - (Some(64), Some(256)) => String::from("(uint64_t)_mm256_extract_epi64"), - (Some(8), Some(512)) => String::from("(uint8_t)_mm512_extract_intrinsic_test_epi8"), - (Some(16), Some(512)) => String::from("(uint16_t)_mm512_extract_intrinsic_test_epi16"), - (Some(32), Some(512)) => String::from("(uint32_t)_mm512_extract_intrinsic_test_epi32"), - (Some(64), Some(512)) => String::from("(uint64_t)_mm512_extract_intrinsic_test_epi64"), - _ => unreachable!( - "invalid length for vector argument: {:?}, {:?}", - self.bit_len, self.simd_len - ), - } - } - - fn rust_scalar_type(&self) -> String { - let prefix = match self.data.kind { - TypeKind::Mask => String::from("__mmask"), - TypeKind::Vector => String::from("i"), - _ => self.kind().rust_prefix().to_string(), - }; - - let bits = if self.inner_size() >= 128 { - 32 } else { - self.inner_size() - }; - format!("{prefix}{bits}") - } - - fn print_result_rust(&self) -> String { - let return_value = match self.kind() { - // `_mm{256}_cvtps_ph` has return type __m128i but contains f16 values - TypeKind::Float if self.param.type_data == "__m128i" => { - "format_args!(\"{:.150?}\", debug_as::<_, f16>(__return_value))".to_string() - } - TypeKind::Int(_) - if ["__m128i", "__m256i", "__m512i"].contains(&self.param.type_data.as_str()) => - { - format!("debug_as::<_, u{}>(__return_value)", self.inner_size()) - } - _ => "format_args!(\"{__return_value:.150?}\")".to_string(), - }; - - return_value + self.rust_type().replace("__mmask", "u") + } } } diff --git a/library/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs b/library/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs index 681b1a3c52743..6006d7919f875 100644 --- a/library/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs +++ b/library/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs @@ -99,7 +99,7 @@ fn xml_to_intrinsic( } else { param.imm_width }; - let constraint = map_constraints(¶m.imm_type, effective_imm_width); + let constraint = map_constraints(&name, ¶m.imm_type, effective_imm_width); let arg = Argument::::new( i, param.var_name.clone(), diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml index e37f9f124be4c..03fea5e0a4af3 100644 --- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml +++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml @@ -78,14 +78,22 @@ cfg-target-has-atomic-64: &cfg-target-has-atomic-64 neon-unstable-fp8: &neon-unstable-fp8 FnCall: [unstable, ['feature = "stdarch_neon_fp8"', 'issue = "none"']] +# all(test, target_endian = "little") +all-test-little-endian: &all-test-little-endian + FnCall: [all, [test, 'target_endian = "little"']] + # #[cfg(target_endian = "little")] -little-endian: &little-endian +cfg-little-endian: &cfg-little-endian FnCall: [cfg, ['target_endian = "little"']] # #[cfg(target_endian = "big")] -big-endian: &big-endian +cfg-big-endian: &cfg-big-endian FnCall: [cfg, ['target_endian = "big"']] +# all(test, not(target_env = "msvc"), target_endian = "big") +cfg-test-not-msvc-little-endian: &cfg-test-not-msvc-little-endian + FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}, 'target_endian = "little"']] + intrinsics: - name: "vaddd_{type}" doc: Add @@ -174,12 +182,12 @@ intrinsics: - ['d_f64', 'f64'] compose: - FnCall: - - simd_extract! + - 'vget_lane_{type[1]}' - - FnCall: - "vabd_{type[1]}" - - FnCall: ["vdup_n_{type[1]}", [a]] - FnCall: ["vdup_n_{type[1]}", [b]] - - 0 + - - 0 - name: "vabd{type[0]}" doc: "Floating-point absolute difference" @@ -195,125 +203,28 @@ intrinsics: - ['h_f16', 'f16'] compose: - FnCall: - - simd_extract! + - 'vget_lane_{type[1]}' - - FnCall: - "vabd_{type[1]}" - - FnCall: ["vdup_n_{type[1]}", [a]] - FnCall: ["vdup_n_{type[1]}", [b]] - - 0 - - - name: "vabdl_high{neon_type[0].noq}" - doc: Signed Absolute difference Long - arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] - return_type: "{neon_type[1]}" - attr: [*neon-stable] - assert_instr: [sabdl2] - safety: safe - types: - - [int8x16_t, int16x8_t, int8x8_t, uint8x8_t] - compose: - - Let: - - c - - "{neon_type[2]}" - - FnCall: - - simd_shuffle! - - - a - - a - - [8, 9, 10, 11, 12, 13, 14, 15] - - Let: - - d - - "{neon_type[2]}" - - FnCall: - - simd_shuffle! - - - b - - b - - [8, 9, 10, 11, 12, 13, 14, 15] - - Let: - - e - - "{neon_type[3]}" - - FnCall: - - simd_cast - - - FnCall: - - "vabd_{neon_type[0]}" - - - c - - d - - FnCall: - - simd_cast - - - e + - - 0 - name: "vabdl_high{neon_type[0].noq}" doc: Signed Absolute difference Long arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] return_type: "{neon_type[1]}" attr: - - FnCall: - - stable - - - 'feature = "neon_intrinsics"' - - 'since = "1.59.0"' - assert_instr: [sabdl2] + - *neon-stable + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sabdl2]]}]] safety: safe types: + - [int8x16_t, int16x8_t, int8x8_t, uint8x8_t] - [int16x8_t, int32x4_t, int16x4_t, uint16x4_t] - compose: - - Let: - - c - - "{neon_type[2]}" - - FnCall: - - simd_shuffle! - - - a - - a - - [4, 5, 6, 7] - - Let: - - d - - "{neon_type[2]}" - - FnCall: - - simd_shuffle! - - - b - - b - - [4, 5, 6, 7] - - Let: - - e - - "{neon_type[3]}" - - FnCall: - - simd_cast - - - FnCall: - - "vabd_{neon_type[0]}" - - - c - - d - - FnCall: - - simd_cast - - - e - - - name: "vabdl_high{neon_type[0].noq}" - doc: Signed Absolute difference Long - arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] - return_type: "{neon_type[1]}" - attr: - - FnCall: - - stable - - - 'feature = "neon_intrinsics"' - - 'since = "1.59.0"' - assert_instr: [sabdl2] - safety: safe - types: - [int32x4_t, int64x2_t, int32x2_t, uint32x2_t] compose: - - Let: - - c - - "{neon_type[2]}" - - FnCall: - - simd_shuffle! - - - a - - a - - [2, 3] - - Let: - - d - - "{neon_type[2]}" - - FnCall: - - simd_shuffle! - - - b - - b - - [2, 3] + - Let: [c, FnCall: ['vget_high_{neon_type[0]}', [a]]] + - Let: [d, FnCall: ['vget_high_{neon_type[0]}', [b]]] - Let: - e - "{neon_type[3]}" @@ -333,7 +244,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmeq]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [uint64x1_t, uint64x1_t] @@ -351,7 +262,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmeq]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float64x1_t, uint64x1_t] @@ -365,19 +276,19 @@ intrinsics: return_type: "{type[2]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["s_f32", "f32", "u32"] - ["d_f64", "f64", "u64"] compose: - FnCall: - - simd_extract! + - 'vget_lane_{type[2]}' - - FnCall: - "vceq_{type[1]}" - - FnCall: ["vdup_n_{type[1]}", [a]] - FnCall: ["vdup_n_{type[1]}", [b]] - - '0' + - - 0 - name: "vceq{type[0]}" @@ -394,12 +305,12 @@ intrinsics: - ["h_f16", "f16", "u16"] compose: - FnCall: - - simd_extract! + - 'vget_lane_{type[2]}' - - FnCall: - "vceq_{type[1]}" - - FnCall: ["vdup_n_{type[1]}", [a]] - FnCall: ["vdup_n_{type[1]}", [b]] - - '0' + - - 0 - name: "vceqd_{type[0]}" doc: "Compare bitwise equal" @@ -407,7 +318,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["i64", "u64", "s64"] @@ -426,7 +337,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmtst]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [int64x1_t, uint64x1_t, 'i64x1', 'i64x1::new(0)'] @@ -444,7 +355,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tst]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["i64", "u64", "s64"] @@ -463,7 +374,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [suqadd]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["s_s32", "i32", "u32"] @@ -481,19 +392,19 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [suqadd]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["b_s8", "i8", "u8", "s8"] - ["h_s16", "i16", "u16", "s16"] compose: - FnCall: - - simd_extract! + - 'vget_lane_{type[3]}' - - FnCall: - "vuqadd_{type[3]}" - - FnCall: ["vdup_n_{type[3]}", [a]] - FnCall: ["vdup_n_{type[2]}", [b]] - - '0' + - - '0' - name: "vabs{neon_type.no}" doc: "Floating-point absolute value" @@ -501,7 +412,7 @@ intrinsics: return_type: "{neon_type}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fabs]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - float64x1_t @@ -515,7 +426,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmgt]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [int64x1_t, uint64x1_t] @@ -529,7 +440,7 @@ intrinsics: return_type: "{neon_type}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmhi]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - uint64x1_t @@ -543,7 +454,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmgt]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float64x1_t, uint64x1_t] @@ -557,19 +468,19 @@ intrinsics: return_type: "{type[2]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["s_f32", "f32", "u32"] - ["d_f64", "f64", "u64"] compose: - FnCall: - - 'simd_extract!' + - 'vget_lane_{type[2]}' - - FnCall: - "vcgt_{type[1]}" - - FnCall: ["vdup_n_{type[1]}", [a]] - FnCall: ["vdup_n_{type[1]}", [b]] - - '0' + - - '0' - name: "vcgt{type[0]}" @@ -586,12 +497,12 @@ intrinsics: - ["h_f16", "f16", "u16"] compose: - FnCall: - - 'simd_extract!' + - 'vget_lane_{type[2]}' - - FnCall: - "vcgt_{type[1]}" - - FnCall: ["vdup_n_{type[1]}", [a]] - FnCall: ["vdup_n_{type[1]}", [b]] - - '0' + - - '0' - name: "vclt{neon_type[0].no}" doc: "Compare signed less than" @@ -599,7 +510,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmgt]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [int64x1_t, uint64x1_t] @@ -613,7 +524,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmge]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [int64x1_t, uint64x1_t] @@ -627,7 +538,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmge]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float64x1_t, uint64x1_t] @@ -641,19 +552,19 @@ intrinsics: return_type: "{type[2]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["s_f32", "f32", "u32"] - ["d_f64", "f64", "u64"] compose: - FnCall: - - simd_extract! + - 'vget_lane_{type[2]}' - - FnCall: - "vcle_{type[1]}" - - FnCall: ["vdup_n_{type[1]}", [a]] - FnCall: ["vdup_n_{type[1]}", [b]] - - '0' + - - '0' - name: "vcle{type[0]}" @@ -670,12 +581,12 @@ intrinsics: - ["h_f16", "f16", "u16"] compose: - FnCall: - - simd_extract! + - 'vget_lane_{type[2]}' - - FnCall: - "vcle_{type[1]}" - - FnCall: ["vdup_n_{type[1]}", [a]] - FnCall: ["vdup_n_{type[1]}", [b]] - - '0' + - - '0' - name: "vcge{neon_type[0].no}" doc: "Compare signed greater than or equal" @@ -683,7 +594,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmge]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [int64x1_t, uint64x1_t] @@ -697,7 +608,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmge]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)'] @@ -718,7 +629,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["i64", "u64"] @@ -735,7 +646,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmle]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)'] @@ -759,7 +670,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmle]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float32x2_t, uint32x2_t, f32x2, 'f32x2::new(0.0, 0.0)'] @@ -779,18 +690,18 @@ intrinsics: return_type: "{type[2]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["s_f32", "f32", "u32"] - ["d_f64", "f64", "u64"] compose: - FnCall: - - simd_extract! + - 'vget_lane_{type[2]}' - - FnCall: - "vclez_{type[1]}" - - FnCall: ["vdup_n_{type[1]}", [a]] - - '0' + - - '0' - name: "vclez{type[0]}" doc: "Floating-point compare less than or equal to zero" @@ -806,11 +717,11 @@ intrinsics: - ["h_f16", "f16", "u16"] compose: - FnCall: - - simd_extract! + - 'vget_lane_{type[2]}' - - FnCall: - "vclez_{type[1]}" - - FnCall: ["vdup_n_{type[1]}", [a]] - - '0' + - - '0' - name: "vcltz{neon_type[0].no}" doc: "Compare signed less than zero" @@ -818,7 +729,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmlt]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)'] @@ -842,7 +753,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmlt]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float32x2_t, uint32x2_t, f32x2, 'f32x2::new(0.0, 0.0)'] @@ -862,18 +773,18 @@ intrinsics: return_type: "{type[2]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["s_f32", "f32", "u32"] - ["d_f64", "f64", "u64"] compose: - FnCall: - - simd_extract! + - 'vget_lane_{type[2]}' - - FnCall: - "vcltz_{type[1]}" - - FnCall: ["vdup_n_{type[1]}", [a]] - - '0' + - - '0' - name: "vcltz{type[0]}" doc: "Floating-point compare less than zero" @@ -889,11 +800,11 @@ intrinsics: - ["h_f16", "f16", "u16"] compose: - FnCall: - - simd_extract! + - 'vget_lane_{type[2]}' - - FnCall: - "vcltz_{type[1]}" - - FnCall: ["vdup_n_{type[1]}", [a]] - - '0' + - - '0' - name: "vcltzd_s64" doc: "Compare less than zero" @@ -901,7 +812,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [asr]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["i64", "u64"] @@ -918,7 +829,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float64x1_t, uint64x1_t] @@ -936,7 +847,7 @@ intrinsics: return_type: "{type[2]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["s_f32", "f32", "u32", i32] @@ -975,7 +886,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float64x1_t, uint64x1_t] @@ -993,7 +904,7 @@ intrinsics: return_type: "{type[2]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["s_f32", "f32", "u32", i32] @@ -1033,7 +944,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float64x1_t, uint64x1_t] @@ -1047,7 +958,7 @@ intrinsics: return_type: "{type[2]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["s_f32", "f32", "u32"] @@ -1076,7 +987,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float64x1_t, uint64x1_t] @@ -1090,7 +1001,7 @@ intrinsics: return_type: "{type[2]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["s_f32", "f32", "u32"] @@ -1119,7 +1030,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [int64x1_t, float64x1_t] @@ -1133,7 +1044,7 @@ intrinsics: return_type: "{type[2]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["s_f32", "i32", "f32", s32] @@ -1147,7 +1058,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [uint64x1_t, float64x1_t] @@ -1161,7 +1072,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["u32", "f32", "s_f32"] @@ -1176,7 +1087,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -1314,7 +1225,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -1340,7 +1251,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -1365,7 +1276,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -1389,7 +1300,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["f32", "i32", "s_s32_f32", "32"] @@ -1403,7 +1314,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["f32", "u32", "s_u32_f32"] @@ -1488,7 +1399,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float32x2_t, float64x2_t] @@ -1500,28 +1411,20 @@ intrinsics: arguments: ["a: {neon_type[0]}"] return_type: "{neon_type[1]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtl2]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fcvtl2]]}]] + - *neon-stable safety: safe types: - [float32x4_t, float64x2_t] compose: - - Let: - - b - - float32x2_t - - FnCall: - - simd_shuffle! - - - a - - a - - '[2, 3]' - - FnCall: [simd_cast, [b]] + - FnCall: [simd_cast, [{FnCall: ['vget_high_{neon_type[0]}', [a]]}]] - name: "vcvt_high_f16_f32" doc: "Floating-point convert to lower precision" arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}"] return_type: "{neon_type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtn2]]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fcvtn2]]}]] - *neon-stable-fp16 - *target-not-arm64ec safety: safe @@ -1538,7 +1441,7 @@ intrinsics: arguments: ["a: {neon_type[1]}"] return_type: "{neon_type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtl2]]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fcvtl2]]}]] - *neon-stable-fp16 - *target-not-arm64ec safety: safe @@ -1555,8 +1458,8 @@ intrinsics: arguments: ["a: {neon_type[0]}"] return_type: "{neon_type[1]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtn]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fcvtn]]}]] + - *neon-stable safety: safe types: - [float64x2_t, float32x2_t] @@ -1568,25 +1471,24 @@ intrinsics: arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] return_type: "{neon_type[2]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtn2]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fcvtn2]]}]] + - *neon-stable safety: safe types: - [float32x2_t, float64x2_t, float32x4_t] compose: - FnCall: - - simd_shuffle! + - vcombine_f32 - - a - - FnCall: [simd_cast, [b]] - - '[0, 1, 2, 3]' + - FnCall: [vcvt_f32_f64, [b]] - name: "vcvtx_f32_f64" doc: "Floating-point convert to lower precision narrow, rounding to odd" arguments: ["a: {neon_type[0]}"] return_type: "{neon_type[1]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtxn]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fcvtxn]]}]] + - *neon-stable safety: safe types: - [float64x2_t, float32x2_t] @@ -1603,34 +1505,33 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtxn]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["f64", "f32"] compose: - FnCall: - - simd_extract! + - 'vget_lane_{type[1]}' - - FnCall: - vcvtx_f32_f64 - - FnCall: [vdupq_n_f64, [a]] - - '0' + - - '0' - name: "vcvtx_high_f32_f64" doc: "Floating-point convert to lower precision narrow, rounding to odd" arguments: ["a: {type[0]}", "b: {neon_type[1]}"] return_type: "{type[2]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtxn2]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fcvtxn2]]}]] + - *neon-stable safety: safe types: - [float32x2_t, float64x2_t, float32x4_t] compose: - FnCall: - - simd_shuffle! + - vcombine_f32 - - a - FnCall: [vcvtx_f32_f64, [b]] - - '[0, 1, 2, 3]' - name: "vcvt{type[2]}" doc: "Floating-point convert to fixed-point, rounding toward zero" @@ -1639,7 +1540,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -1662,7 +1563,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -1756,7 +1657,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -1779,7 +1680,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -1801,7 +1702,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float32x2_t, int32x2_t, _s32_f32] @@ -1842,7 +1743,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["f32", "i32", 's_s32_f32'] @@ -1866,9 +1767,9 @@ intrinsics: - *target-not-arm64ec safety: safe types: + - ["f16", "u16", 'h_u16_f16'] - ["f16", "u32", 'h_u32_f16'] - ["f16", "u64", 'h_u64_f16'] - compose: - LLVMLink: name: "vcvta{type[2]}" @@ -1888,6 +1789,7 @@ intrinsics: - *target-not-arm64ec safety: safe types: + - ["f16", "i16", 'h_s16_f16'] - ["f16", "i32", 'h_s32_f16'] - ["f16", "i64", 'h_s64_f16'] compose: @@ -1898,44 +1800,13 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtas.{type[1]}.{type[0]}" arch: aarch64,arm64ec - - - name: "vcvta{type[2]}" - doc: "Floating-point convert to integer, rounding to nearest with ties to away" - arguments: ["a: {type[0]}"] - return_type: "{type[1]}" - attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]] - - *neon-fp16 - - *neon-unstable-f16 - - *target-not-arm64ec - safety: safe - types: - - ["f16", "i16", 'h_s16_f16', 's32'] - compose: - - 'vcvtah_{type[3]}_f16(a) as i16' - - name: "vcvta{type[2]}" doc: "Floating-point convert to integer, rounding to nearest with ties to away" arguments: ["a: {type[0]}"] return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]] - - *neon-fp16 - - *neon-unstable-f16 - - *target-not-arm64ec - safety: safe - types: - - ["f16", "u16", 'h_u16_f16', 'u32'] - compose: - - 'vcvtah_{type[3]}_f16(a) as u16' - - - name: "vcvta{type[2]}" - doc: "Floating-point convert to integer, rounding to nearest with ties to away" - arguments: ["a: {type[0]}"] - return_type: "{type[1]}" - attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["f32", "u32", 's_u32_f32'] @@ -1953,7 +1824,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float32x2_t, int32x2_t] @@ -1973,7 +1844,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["f32", "i32", 's_s32_f32'] @@ -2038,6 +1909,7 @@ intrinsics: - *target-not-arm64ec safety: safe types: + - ["f16", "i16", 'h'] - ["f16", "i32", 'h'] - ["f16", "i64", 'h'] compose: @@ -2048,22 +1920,6 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtns.{type[1]}.{type[0]}" arch: aarch64,arm64ec - - name: "vcvtn{type[2]}_{type[1]}_{type[0]}" - doc: "Floating-point convert to integer, rounding to nearest with ties to even" - arguments: ["a: {type[0]}"] - return_type: "{type[1]}" - attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]] - - *neon-fp16 - - *neon-unstable-f16 - - *target-not-arm64ec - safety: safe - types: - - ["f16", "i16", 'h', 'i32'] - compose: - - 'vcvtnh_{type[3]}_f16(a) as i16' - - - name: "vcvtn{type[2]}_{type[1]}_{type[0]}" doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even" arguments: ["a: {type[0]}"] @@ -2075,6 +1931,7 @@ intrinsics: - *target-not-arm64ec safety: safe types: + - ["f16", "u16", 'h'] - ["f16", "u32", 'h'] - ["f16", "u64", 'h'] compose: @@ -2085,28 +1942,13 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtnu.{type[1]}.{type[0]}" arch: aarch64,arm64ec - - name: "vcvtn{type[2]}_{type[1]}_{type[0]}" - doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even" - arguments: ["a: {type[0]}"] - return_type: "{type[1]}" - attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]] - - *neon-fp16 - - *neon-unstable-f16 - - *target-not-arm64ec - safety: safe - types: - - ["f16", "u16", 'h', 'u32'] - compose: - - 'vcvtnh_{type[3]}_f16(a) as u16' - - name: "vcvtm{neon_type[1].no}_{neon_type[0]}" doc: "Floating-point convert to signed integer, rounding toward minus infinity" arguments: ["a: {neon_type[0]}"] return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float32x2_t, int32x2_t] @@ -2169,7 +2011,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["f32", "i32", 's_s32_f32'] @@ -2187,7 +2029,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float32x2_t, int32x2_t] @@ -2207,7 +2049,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["f32", "i32", 's_s32_f32'] @@ -2225,7 +2067,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float32x2_t, uint32x2_t] @@ -2245,7 +2087,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["f32", "u32", 's_u32_f32'] @@ -2263,7 +2105,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float32x2_t, uint32x2_t] @@ -2283,7 +2125,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["f32", "u32", s_u32_f32] @@ -2301,7 +2143,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float32x2_t, uint32x2_t] @@ -2321,7 +2163,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["f32", "u32", s_u32_f32, 'i32'] @@ -2390,6 +2232,7 @@ intrinsics: - *target-not-arm64ec safety: safe types: + - ["f16", "i16", 'h'] - ["f16", "i32", 'h'] - ["f16", "i64", 'h'] compose: @@ -2400,21 +2243,6 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtps.{type[1]}.{type[0]}" arch: aarch64,arm64ec - - name: "vcvtp{type[2]}_{type[1]}_{type[0]}" - doc: "Floating-point convert to integer, rounding to plus infinity" - arguments: ["a: {type[0]}"] - return_type: "{type[1]}" - attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]] - - *neon-fp16 - - *neon-unstable-f16 - - *target-not-arm64ec - safety: safe - types: - - ["f16", "i16", 'h', 'i32'] - compose: - - 'vcvtph_{type[3]}_f16(a) as i16' - - name: "vcvtp{type[2]}_{type[1]}_{type[0]}" doc: "Floating-point convert to unsigned integer, rounding to plus infinity" arguments: ["a: {type[0]}"] @@ -2426,6 +2254,7 @@ intrinsics: - *target-not-arm64ec safety: safe types: + - ["f16", "u16", 'h'] - ["f16", "u32", 'h'] - ["f16", "u64", 'h'] compose: @@ -2436,21 +2265,6 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtpu.{type[1]}.{type[0]}" arch: aarch64,arm64ec - - name: "vcvtp{type[2]}_{type[1]}_{type[0]}" - doc: "Floating-point convert to unsigned integer, rounding to plus infinity" - arguments: ["a: {type[0]}"] - return_type: "{type[1]}" - attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]] - - *neon-fp16 - - *neon-unstable-f16 - - *target-not-arm64ec - safety: safe - types: - - ["f16", "u16", 'h', 'u32'] - compose: - - 'vcvtph_{type[3]}_f16(a) as u16' - - name: "vdup{neon_type.laneq_nox}" doc: "Set all vector lanes to the same value" arguments: ["a: {neon_type}"] @@ -2458,8 +2272,9 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [dup, 'N = 1']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] + big_endian_inverse: true safety: safe types: - poly64x2_t @@ -2475,8 +2290,9 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [dup, 'N = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] + big_endian_inverse: true safety: safe types: - [poly64x1_t, poly64x2_t] @@ -2492,7 +2308,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -2509,7 +2325,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -2518,7 +2334,7 @@ intrinsics: - [float64x1_t, "f64"] compose: - FnCall: [static_assert!, ['N == 0']] - - FnCall: [simd_extract!, [a, 'N as u32']] + - FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]] - name: "vdup_laneq_{neon_type[0]}" doc: "Set all vector lanes to the same value" @@ -2527,7 +2343,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 1']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -2536,8 +2352,8 @@ intrinsics: compose: - FnCall: [static_assert_uimm_bits!, [N, 1]] - FnCall: - - "transmute::<{type[2]}, _>" - - - FnCall: [simd_extract!, [a, 'N as u32']] + - transmute + - - FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]] - name: "vdup{type[2]}" doc: "Set all vector lanes to the same value" @@ -2546,7 +2362,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 1']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -2558,7 +2374,7 @@ intrinsics: - [float64x2_t, "f64", d_laneq_f64] compose: - FnCall: [static_assert_uimm_bits!, [N, 1]] - - FnCall: [simd_extract!, [a, 'N as u32']] + - FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]] - name: "vdup{type[2]}" doc: "Set all vector lanes to the same value" @@ -2567,7 +2383,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 4']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -2579,7 +2395,7 @@ intrinsics: - [poly16x8_t, "p16", h_laneq_p16] compose: - FnCall: [static_assert_uimm_bits!, [N, 3]] - - FnCall: [simd_extract!, [a, 'N as u32']] + - FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]] - name: "vdup{type[2]}" @@ -2598,7 +2414,7 @@ intrinsics: - [float16x4_t, "f16", h_lane_f16] compose: - FnCall: [static_assert_uimm_bits!, [N, 2]] - - FnCall: [simd_extract!, [a, 'N as u32']] + - FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]] - name: "vdup{type[2]}" @@ -2617,7 +2433,7 @@ intrinsics: - [float16x8_t, "f16", h_laneq_f16] compose: - FnCall: [static_assert_uimm_bits!, [N, 4]] - - FnCall: [simd_extract!, [a, 'N as u32']] + - FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]] - name: "vdup{type[2]}" @@ -2627,7 +2443,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 8']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -2636,7 +2452,7 @@ intrinsics: - [poly8x16_t, "p8", b_laneq_p8] compose: - FnCall: [static_assert_uimm_bits!, [N, 4]] - - FnCall: [simd_extract!, [a, 'N as u32']] + - FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]] - name: "vdup{type[2]}" doc: "Set all vector lanes to the same value" @@ -2645,7 +2461,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -2657,24 +2473,25 @@ intrinsics: - [float32x4_t, "f32", s_laneq_f32] compose: - FnCall: [static_assert_uimm_bits!, [N, 2]] - - FnCall: [simd_extract!, [a, 'N as u32']] + - FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]] - - name: "vext{neon_type[0].no}" + - name: "vext{neon_type.no}" doc: "Extract vector from pair of vectors" - arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] - return_type: "{neon_type[0]}" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ext, 'N = 1']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] + big_endian_inverse: true safety: safe types: - - [poly64x2_t, 'match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }'] - - [float64x2_t, 'match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }'] + - poly64x2_t + - float64x2_t compose: - FnCall: [static_assert_uimm_bits!, [N, 1]] - - Identifier: ["{type[1]}", UnsafeSymbol] + - FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1]']] - name: "vmla{neon_type.no}" doc: "Floating-point multiply-add to accumulator" @@ -2682,7 +2499,7 @@ intrinsics: return_type: "{neon_type}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - float64x1_t @@ -2695,16 +2512,16 @@ intrinsics: arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] return_type: "{neon_type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlal2]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smlal2]]}]] + - *neon-stable safety: safe types: - - [int16x8_t, int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', '[8, 9, 10, 11, 12, 13, 14, 15]'] - - [int32x4_t, int16x8_t, int16x4_t, '[4, 5, 6, 7]', '[4, 5, 6, 7]'] - - [int64x2_t, int32x4_t, int32x2_t, '[2, 3]', '[2, 3]'] + - [int16x8_t, int8x16_t, int8x8_t] + - [int32x4_t, int16x8_t, int16x4_t] + - [int64x2_t, int32x4_t, int32x2_t] compose: - - Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}] - - Let: [c, "{neon_type[2]}", {FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]}] + - Let: [b, {FnCall: ['vget_high_{neon_type[1]}', [b]]}] + - Let: [c, {FnCall: ['vget_high_{neon_type[1]}', [c]]}] - FnCall: ["vmlal_{neon_type[2]}", [a, b, c]] - name: "vmlal_high_{neon_type[1]}" @@ -2712,22 +2529,16 @@ intrinsics: arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] return_type: "{neon_type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlal2]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umlal2]]}]] + - *neon-stable safety: safe types: - - [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]'] - - [uint32x4_t, uint16x8_t, uint16x4_t, '[4, 5, 6, 7]'] - - [uint64x2_t, uint32x4_t, uint32x2_t, '[2, 3]'] + - [uint16x8_t, uint8x16_t, uint8x8_t] + - [uint32x4_t, uint16x8_t, uint16x4_t] + - [uint64x2_t, uint32x4_t, uint32x2_t] compose: - - Let: - - b - - "{neon_type[2]}" - - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]] - - Let: - - c - - "{neon_type[2]}" - - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]] + - Let: [b, {FnCall: ['vget_high_{neon_type[1]}', [b]]}] + - Let: [c, {FnCall: ['vget_high_{neon_type[1]}', [c]]}] - FnCall: ["vmlal_{neon_type[1]}", [a, b, c]] - name: "vmlsl_high_{neon_type[1]}" @@ -2735,22 +2546,16 @@ intrinsics: arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] return_type: "{neon_type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlsl2]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smlsl2]]}]] + - *neon-stable safety: safe types: - - [int16x8_t, int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]'] - - [int32x4_t, int16x8_t, int16x4_t, '[4, 5, 6, 7]'] - - [int64x2_t, int32x4_t, int32x2_t, '[2, 3]'] + - [int16x8_t, int8x16_t, int8x8_t] + - [int32x4_t, int16x8_t, int16x4_t] + - [int64x2_t, int32x4_t, int32x2_t] compose: - - Let: - - b - - "{neon_type[2]}" - - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]] - - Let: - - c - - "{neon_type[2]}" - - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]] + - Let: [b, {FnCall: ['vget_high_{neon_type[1]}', [b]]}] + - Let: [c, {FnCall: ['vget_high_{neon_type[1]}', [c]]}] - FnCall: ["vmlsl_{neon_type[1]}", [a, b, c]] - name: "vmlsl_high_{neon_type[1]}" @@ -2758,44 +2563,38 @@ intrinsics: arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] return_type: "{neon_type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlsl2]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umlsl2]]}]] + - *neon-stable safety: safe types: - - [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]'] - - [uint32x4_t, uint16x8_t, uint16x4_t, '[4, 5, 6, 7]'] - - [uint64x2_t, uint32x4_t, uint32x2_t, '[2, 3]'] + - [uint16x8_t, uint8x16_t, uint8x8_t] + - [uint32x4_t, uint16x8_t, uint16x4_t] + - [uint64x2_t, uint32x4_t, uint32x2_t] compose: - - Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}] - - Let: [c, "{neon_type[2]}", {FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]}] + - Let: [b, {FnCall: ['vget_high_{neon_type[1]}', [b]]}] + - Let: [c, {FnCall: ['vget_high_{neon_type[1]}', [c]]}] - FnCall: ["vmlsl_{neon_type[1]}", [a, b, c]] - name: "vmovn_high{neon_type[1].noq}" doc: Extract narrow arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] return_type: "{neon_type[2]}" - attr: [*neon-stable] - assert_instr: [xtn2] + attr: + - *neon-stable + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [xtn2]]}]] safety: safe types: - - [int8x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - [int16x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]'] - - [int32x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]'] - - [uint8x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - [uint16x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]'] - - [uint32x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]'] + - [int8x8_t, int16x8_t, int8x16_t] + - [int16x4_t, int32x4_t, int16x8_t] + - [int32x2_t, int64x2_t, int32x4_t] + - [uint8x8_t, uint16x8_t, uint8x16_t] + - [uint16x4_t, uint32x4_t, uint16x8_t] + - [uint32x2_t, uint64x2_t, uint32x4_t] compose: - - Let: - - c - - "{neon_type[0]}" - - FnCall: - - simd_cast - - - b - FnCall: - - simd_shuffle! + - 'vcombine_{neon_type[0]}' - - a - - c - - "{type[3]}" + - FnCall: ['simd_cast', [b]] - name: "vneg{neon_type.no}" doc: Negate @@ -2873,11 +2672,11 @@ intrinsics: - [i64, 'd_s64', 's64'] compose: - FnCall: - - 'simd_extract!' + - 'vget_lane_{type[2]}' - - FnCall: - 'vqneg_{type[2]}' - - FnCall: ['vdup_n_{type[2]}', [a]] - - 0 + - - 0 - name: "vqneg{neon_type[0].no}" doc: Signed saturating negate @@ -2954,12 +2753,12 @@ intrinsics: - "vdup_n_{type[2]}" - - b - FnCall: - - 'simd_extract!' + - 'vget_lane_{type[2]}' - - FnCall: - "vqsub_{type[2]}" - - a - b - - "0" + - - "0" - name: "vqsub{type[3]}" doc: Saturating subtract @@ -2985,12 +2784,12 @@ intrinsics: - "vdup_n_{type[2]}" - - b - FnCall: - - 'simd_extract!' + - 'vget_lane_{type[2]}' - - FnCall: - "vqsub_{type[2]}" - - a - b - - "0" + - - "0" - name: "vrbit{neon_type.no}" doc: Reverse bit order @@ -3439,12 +3238,12 @@ intrinsics: - "vdup_n_{type[0]}" - - b - FnCall: - - simd_extract! + - 'vget_lane_{type[0]}' - - FnCall: - "vqadd_{type[0]}" - - a - b - - "0" + - - "0" - name: "vqadd{type[2]}" doc: Saturating add @@ -3470,12 +3269,12 @@ intrinsics: - "vdup_n_{type[0]}" - - b - FnCall: - - simd_extract! + - 'vget_lane_{type[0]}' - - FnCall: - "vqadd_{type[0]}" - - a - b - - "0" + - - "0" - name: "vld1{neon_type[1].no}" doc: "Load multiple single-element structures to one, two, three, or four registers" @@ -3712,7 +3511,6 @@ intrinsics: return_type: "{neon_type[1]}" attr: [*neon-stable] assert_instr: [ld2] - big_endian_inverse: false safety: unsafe: [neon] types: @@ -4059,7 +3857,6 @@ intrinsics: arguments: ["a: {type[0]}"] return_type: "{neon_type[1]}" attr: [*neon-stable] - big_endian_inverse: false safety: unsafe: [neon] assert_instr: [ld3] @@ -4198,7 +3995,6 @@ intrinsics: return_type: "{neon_type[1]}" attr: [*neon-stable] assert_instr: [ld4] - big_endian_inverse: false safety: unsafe: [neon] types: @@ -4306,7 +4102,6 @@ intrinsics: - *neon-stable static_defs: - "const LANE: i32" - big_endian_inverse: false safety: unsafe: [neon] types: @@ -4356,7 +4151,6 @@ intrinsics: - *neon-stable static_defs: - "const LANE: i32" - big_endian_inverse: false safety: unsafe: [neon] types: @@ -4480,7 +4274,7 @@ intrinsics: - Let: - "lane" - i64 - - FnCall: [simd_extract!, [val, 'LANE as u32']] + - FnCall: ['vget{neon_type[1].lane_nox}', [val], [LANE]] - MethodCall: - "(*atomic_dst)" - store @@ -5152,45 +4946,35 @@ intrinsics: - name: "vmull_high{neon_type[0].noq}" doc: Signed multiply long arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] - return_type: "{neon_type[3]}" - attr: [*neon-stable] - assert_instr: [smull2] + return_type: "{neon_type[1]}" + attr: + - *neon-stable + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smull2]]}]] safety: safe types: - - [int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', int16x8_t] - - [int16x8_t, int16x4_t, '[4, 5, 6, 7]', int32x4_t] - - [int32x4_t, int32x2_t, '[2, 3]', int64x2_t] + - [int8x16_t, int16x8_t] + - [int16x8_t, int32x4_t] + - [int32x4_t, int64x2_t] compose: - - Let: - - a - - "{neon_type[1]}" - - FnCall: [simd_shuffle!, [a, a, "{type[2]}"]] - - Let: - - b - - "{neon_type[1]}" - - FnCall: [simd_shuffle!, [b, b, "{type[2]}"]] + - Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}] + - Let: [b, {FnCall: ['vget_high_{neon_type[0]}', [b]]}] - FnCall: ["vmull_{neon_type[0]}", [a, b]] - name: "vmull_high{neon_type[0].noq}" doc: "Unsigned multiply long" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] - return_type: "{neon_type[3]}" - attr: [*neon-stable] - assert_instr: [umull2] + return_type: "{neon_type[1]}" + attr: + - *neon-stable + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umull2]]}]] safety: safe types: - - [uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', uint16x8_t] - - [uint16x8_t, uint16x4_t, '[4, 5, 6, 7]', uint32x4_t] - - [uint32x4_t, uint32x2_t, '[2, 3]', uint64x2_t] + - [uint8x16_t, uint16x8_t] + - [uint16x8_t, uint32x4_t] + - [uint32x4_t, uint64x2_t] compose: - - Let: - - a - - "{neon_type[1]}" - - FnCall: [simd_shuffle!, [a, a, "{type[2]}"]] - - Let: - - b - - "{neon_type[1]}" - - FnCall: [simd_shuffle!, [b, b, "{type[2]}"]] + - Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}] + - Let: [b, {FnCall: ['vget_high_{neon_type[0]}', [b]]}] - FnCall: ["vmull_{neon_type[0]}", [a, b]] - name: "vmull_p64" @@ -5216,22 +5000,16 @@ intrinsics: - name: "vmull_high{neon_type[0].noq}" doc: "Polynomial multiply long" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] - return_type: "{neon_type[3]}" + return_type: "{neon_type[1]}" attr: - *neon-stable + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [pmull2]]}]] safety: safe - assert_instr: [pmull2] types: - - [poly8x16_t, poly8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', poly16x8_t] + - [poly8x16_t, poly16x8_t] compose: - - Let: - - a - - "{neon_type[1]}" - - FnCall: [simd_shuffle!, [a, a, "{type[2]}"]] - - Let: - - b - - "{neon_type[1]}" - - FnCall: [simd_shuffle!, [b, b, "{type[2]}"]] + - Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}] + - Let: [b, {FnCall: ['vget_high_{neon_type[0]}', [b]]}] - FnCall: ["vmull_{neon_type[0]}", [a, b]] - name: "vmull_high{neon_type[0].noq}" @@ -5241,15 +5019,15 @@ intrinsics: attr: - *neon-aes - *neon-stable + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [pmull2]]}]] safety: safe - assert_instr: [pmull2] types: - [poly64x2_t, "p128"] compose: - FnCall: - "vmull_{neon_type[0]}" - - - FnCall: [simd_extract!, [a, '1']] - - FnCall: [simd_extract!, [b, '1']] + - - FnCall: ['vget{neon_type[0].lane_nox}', [a], [1]] + - FnCall: ['vget{neon_type[0].lane_nox}', [b], [1]] - name: "vmulx{neon_type.no}" doc: Floating-point multiply extended @@ -5348,11 +5126,8 @@ intrinsics: - vmulx_f64 - - a - FnCall: - - 'transmute::' - - - FnCall: - - "simd_extract!" - - - b - - 'LANE as u32' + - 'transmute' + - - FnCall: ['vget{neon_type.lane_nox}', [b], [LANE]] - name: "vmulx{type[0]}" doc: Floating-point multiply extended @@ -5371,11 +5146,7 @@ intrinsics: - FnCall: - "vmulx{type[3]}" - - a - - FnCall: - - "simd_shuffle!" - - - b - - b - - "{type[4]}" + - FnCall: ['vdup{type[0]}', [b], [LANE]] - name: "vmulx{type[0]}" doc: Floating-point multiply extended @@ -5388,16 +5159,13 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - ["d_lane_f64", "f64", float64x1_t, "d_f64", 'LANE as u32'] + - ["d_lane_f64", "f64", float64x1_t, "d_f64"] compose: - FnCall: [static_assert!, ['LANE == 0']] - FnCall: - "vmulx{type[3]}" - - a - - FnCall: - - "simd_extract!" - - - b - - "{type[4]}" + - FnCall: ['vget{neon_type[2].lane_nox}', [b], [LANE]] - name: "vmulx_laneq_f64" doc: Floating-point multiply extended @@ -5417,11 +5185,8 @@ intrinsics: - vmulx_f64 - - a - FnCall: - - 'transmute::' - - - FnCall: - - "simd_extract!" - - - b - - 'LANE as u32' + - 'transmute' + - - FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]] - name: "vmulx{type[0]}" doc: Floating-point multiply extended @@ -5434,21 +5199,17 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - ['_lane_f32', float32x2_t, float32x2_t, '1', '_f32', '[LANE as u32; 2]'] - - ['_laneq_f32', float32x2_t, float32x4_t, '2', '_f32', '[LANE as u32; 2]'] - - ['q_lane_f32', float32x4_t, float32x2_t, '1', 'q_f32', '[LANE as u32; 4]'] - - ['q_laneq_f32', float32x4_t, float32x4_t, '2', 'q_f32', '[LANE as u32; 4]'] - - ['q_laneq_f64', float64x2_t, float64x2_t, '1', 'q_f64', '[LANE as u32; 2]'] + - ['_lane_f32', float32x2_t, float32x2_t, '1', '_f32'] + - ['_laneq_f32', float32x2_t, float32x4_t, '2', '_f32'] + - ['q_lane_f32', float32x4_t, float32x2_t, '1', 'q_f32'] + - ['q_laneq_f32', float32x4_t, float32x4_t, '2', 'q_f32'] + - ['q_laneq_f64', float64x2_t, float64x2_t, '1', 'q_f64'] compose: - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]] - FnCall: - "vmulx{type[4]}" - - a - - FnCall: - - "simd_shuffle!" - - - b - - b - - "{type[5]}" + - FnCall: ['vdup{type[0]}', [b], [LANE]] - name: "vmulx{type[0]}" @@ -5464,20 +5225,16 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - ['_lane_f16', float16x4_t, float16x4_t, '2', '_f16', '[LANE as u32; 4]'] - - ['_laneq_f16', float16x4_t, float16x8_t, '3', '_f16', '[LANE as u32; 4]'] - - ['q_lane_f16', float16x8_t, float16x4_t, '2', 'q_f16', '[LANE as u32; 8]'] - - ['q_laneq_f16', float16x8_t, float16x8_t, '3', 'q_f16', '[LANE as u32; 8]'] + - ['_lane_f16', float16x4_t, float16x4_t, '2', '_f16'] + - ['_laneq_f16', float16x4_t, float16x8_t, '3', '_f16'] + - ['q_lane_f16', float16x8_t, float16x4_t, '2', 'q_f16'] + - ['q_laneq_f16', float16x8_t, float16x8_t, '3', 'q_f16'] compose: - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]] - FnCall: - "vmulx{type[4]}" - - a - - FnCall: - - "simd_shuffle!" - - - b - - b - - "{type[5]}" + - FnCall: ['vdup{type[0]}', [b], [LANE]] - name: "vmulx{type[0]}" @@ -5491,18 +5248,15 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - ['s_lane_f32', f32, float32x2_t, '1', 's_f32', 'LANE as u32'] - - ['s_laneq_f32', f32, float32x4_t, '2', 's_f32', 'LANE as u32'] - - ['d_laneq_f64', f64, float64x2_t, '1', 'd_f64', 'LANE as u32'] + - ['s_lane_f32', f32, float32x2_t, '1', 's_f32'] + - ['s_laneq_f32', f32, float32x4_t, '2', 's_f32'] + - ['d_laneq_f64', f64, float64x2_t, '1', 'd_f64'] compose: - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]] - FnCall: - "vmulx{type[4]}" - - a - - FnCall: - - "simd_extract!" - - - b - - "{type[5]}" + - FnCall: ['vget{neon_type[2].lane_nox}', [b], [LANE]] - name: "vmulx{type[0]}" @@ -5525,10 +5279,7 @@ intrinsics: - FnCall: - "vmulx{type[4]}" - - a - - FnCall: - - "simd_extract!" - - - b - - "{type[5]}" + - FnCall: ['vget{neon_type[2].lane_nox}', [b], [LANE]] - name: "vmulx{neon_type[0].N}" @@ -5822,18 +5573,18 @@ intrinsics: doc: Signed Subtract Wide arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] return_type: "{neon_type[0]}" - attr: [*neon-stable] - assert_instr: [ssubw2] + attr: + - *neon-stable + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [ssubw2]]}]] safety: safe types: - - [int16x8_t, int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]'] - - [int32x4_t, int16x8_t, int16x4_t, '[4, 5, 6, 7]'] - - [int64x2_t, int32x4_t, int32x2_t, '[2, 3]'] + - [int16x8_t, int8x16_t] + - [int32x4_t, int16x8_t] + - [int64x2_t, int32x4_t] compose: - Let: - c - - "{neon_type[2]}" - - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]] + - FnCall: ['vget_high_{neon_type[1]}', [b]] - FnCall: - simd_sub - - a @@ -5843,18 +5594,18 @@ intrinsics: doc: Unsigned Subtract Wide arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] return_type: "{neon_type[0]}" - attr: [*neon-stable] - assert_instr: [usubw2] + attr: + - *neon-stable + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [usubw2]]}]] safety: safe types: - - [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]'] - - [uint32x4_t, uint16x8_t, uint16x4_t, '[4, 5, 6, 7]'] - - [uint64x2_t, uint32x4_t, uint32x2_t, '[2, 3]'] + - [uint16x8_t, uint8x16_t] + - [uint32x4_t, uint16x8_t] + - [uint64x2_t, uint32x4_t] compose: - Let: - c - - "{neon_type[2]}" - - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]] + - FnCall: ['vget_high_{neon_type[1]}', [b]] - FnCall: - simd_sub - - a @@ -5864,61 +5615,47 @@ intrinsics: doc: "Signed Subtract Long" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] return_type: "{neon_type[1]}" - attr: [*neon-stable] - assert_instr: [ssubl2] + attr: + - *neon-stable + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [ssubl2]]}]] safety: safe types: - - [int8x16_t, int16x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', int8x8_t] - - [int16x8_t, int32x4_t, '[4, 5, 6, 7]', int16x4_t] - - [int32x4_t, int64x2_t, '[2, 3]', int32x2_t] + - [int8x16_t, int16x8_t] + - [int32x4_t, int64x2_t] + - [int16x8_t, int32x4_t] compose: - Let: - c - - "{neon_type[3]}" - - FnCall: [simd_shuffle!, [a, a, "{type[2]}"]] - - Let: - - d - "{neon_type[1]}" - - FnCall: [simd_cast, [c]] - - Let: - - e - - "{neon_type[3]}" - - FnCall: [simd_shuffle!, [b, b, "{type[2]}"]] + - FnCall: [simd_cast, [{FnCall: ['vget_high_{neon_type[0]}', [a]]}]] - Let: - - f + - d - "{neon_type[1]}" - - FnCall: [simd_cast, [e]] - - FnCall: [simd_sub, [d, f]] + - FnCall: [simd_cast, [{FnCall: ['vget_high_{neon_type[0]}', [b]]}]] + - FnCall: [simd_sub, [c, d]] - name: "vsubl_high{neon_type[0].noq}" doc: "Unsigned Subtract Long" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] return_type: "{neon_type[1]}" - attr: [*neon-stable] - assert_instr: [usubl2] + attr: + - *neon-stable + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [usubl2]]}]] safety: safe types: - - [uint8x16_t, uint16x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', uint8x8_t] - - [uint16x8_t, uint32x4_t, '[4, 5, 6, 7]', uint16x4_t] - - [uint32x4_t, uint64x2_t, '[2, 3]', uint32x2_t] + - [uint8x16_t, uint16x8_t] + - [uint16x8_t, uint32x4_t] + - [uint32x4_t, uint64x2_t] compose: - Let: - c - - "{neon_type[3]}" - - FnCall: [simd_shuffle!, [a, a, "{type[2]}"]] - - Let: - - d - "{neon_type[1]}" - - FnCall: [simd_cast, [c]] - - Let: - - e - - "{neon_type[3]}" - - FnCall: [simd_shuffle!, [b, b, "{type[2]}"]] + - FnCall: [simd_cast, [{FnCall: ['vget_high_{neon_type[0]}', [a]]}]] - Let: - - f + - d - "{neon_type[1]}" - - FnCall: [simd_cast, [e]] - - FnCall: [simd_sub, [d, f]] + - FnCall: [simd_cast, [{FnCall: ['vget_high_{neon_type[0]}', [b]]}]] + - FnCall: [simd_sub, [c, d]] - name: "vbcax{neon_type.no}" doc: Bit clear and exclusive OR @@ -5971,6 +5708,7 @@ intrinsics: - *neon-unstable-fcma assert_instr: [fcadd] safety: safe + big_endian_inverse: true types: - float32x2_t - float32x4_t @@ -5991,6 +5729,7 @@ intrinsics: - *neon-unstable-fcma assert_instr: [fcadd] safety: safe + big_endian_inverse: true types: - float32x2_t - float32x4_t @@ -6013,6 +5752,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [fcadd] safety: safe + big_endian_inverse: true types: - float16x4_t - float16x8_t @@ -6034,6 +5774,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [fcadd] safety: safe + big_endian_inverse: true types: - float16x4_t - float16x8_t @@ -6053,6 +5794,7 @@ intrinsics: - *neon-unstable-fcma assert_instr: [fcmla] safety: safe + big_endian_inverse: true types: - float32x2_t - float32x4_t @@ -6075,6 +5817,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [fcmla] safety: safe + big_endian_inverse: true types: - float16x4_t - float16x8_t @@ -6094,6 +5837,7 @@ intrinsics: - *neon-unstable-fcma assert_instr: [fcmla] safety: safe + big_endian_inverse: true types: - float32x2_t - float32x4_t @@ -6116,6 +5860,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [fcmla] safety: safe + big_endian_inverse: true types: - float16x4_t - float16x8_t @@ -6135,6 +5880,7 @@ intrinsics: - *neon-unstable-fcma assert_instr: [fcmla] safety: safe + big_endian_inverse: true types: - float32x2_t - float32x4_t @@ -6158,6 +5904,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [fcmla] safety: safe + big_endian_inverse: true types: - float16x4_t - float16x8_t @@ -6180,14 +5927,13 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - [float32x2_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]'] - - [float32x4_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float32x2_t, float32x4_t, ''] + - [float32x4_t, float32x4_t, 'q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, 1]] - - Let: - - c - - "{neon_type[0]}" - - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - Let: [c, {FnCall: [vreinterpretq_u64_f32, [c]]}] + - Let: [c, {FnCall: ['vdup{type[2]}_laneq_u64', [c], [LANE]]}] + - Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}] - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]] - name: "vcmla{neon_type[0].laneq_nox}" @@ -6204,14 +5950,13 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - [float16x4_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] - - [float16x8_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float16x4_t, float16x8_t, ''] + - [float16x8_t, float16x8_t, 'q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, 2]] - - Let: - - c - - "{neon_type[0]}" - - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - Let: [c, {FnCall: [vreinterpretq_u32_f16, [c]]}] + - Let: [c, {FnCall: ['vdup{type[2]}_laneq_u32', [c], [LANE]]}] + - Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}] - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]] - name: "vcmla{neon_type[0].rot90_laneq}" @@ -6226,14 +5971,13 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - [float32x2_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]'] - - [float32x4_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float32x2_t, float32x4_t, ''] + - [float32x4_t, float32x4_t, 'q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, 1]] - - Let: - - c - - "{neon_type[0]}" - - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - Let: [c, {FnCall: [vreinterpretq_u64_f32, [c]]}] + - Let: [c, {FnCall: ['vdup{type[2]}_laneq_u64', [c], [LANE]]}] + - Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}] - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]] - name: "vcmla{neon_type[0].rot90_laneq}" @@ -6250,14 +5994,13 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - [float16x4_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] - - [float16x8_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float16x4_t, float16x8_t, ''] + - [float16x8_t, float16x8_t, 'q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, 2]] - - Let: - - c - - "{neon_type[0]}" - - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - Let: [c, {FnCall: [vreinterpretq_u32_f16, [c]]}] + - Let: [c, {FnCall: ['vdup{type[2]}_laneq_u32', [c], [LANE]]}] + - Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}] - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]] - name: "vcmla{neon_type[0].rot90_lane}" @@ -6272,14 +6015,13 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]'] - - [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float32x2_t, float32x2_t, ''] + - [float32x4_t, float32x2_t, 'q'] compose: - FnCall: [static_assert!, ['LANE == 0']] - - Let: - - c - - "{neon_type[0]}" - - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - Let: [c, {FnCall: [vreinterpret_u64_f32, [c]]}] + - Let: [c, {FnCall: ['vdup{type[2]}_lane_u64', [c], [LANE]]}] + - Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}] - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]] - name: "vcmla{neon_type[0].rot90_lane}" @@ -6296,14 +6038,13 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] - - [float16x8_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float16x4_t, float16x4_t, ''] + - [float16x8_t, float16x4_t, 'q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, 1]] - - Let: - - c - - "{neon_type[0]}" - - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - Let: [c, {FnCall: [vreinterpret_u32_f16, [c]]}] + - Let: [c, {FnCall: ['vdup{type[2]}_lane_u32', [c], [LANE]]}] + - Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}] - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]] - name: "vcmla{neon_type.rot180}" @@ -6315,6 +6056,7 @@ intrinsics: - *neon-unstable-fcma assert_instr: [fcmla] safety: safe + big_endian_inverse: true types: - float32x2_t - float32x4_t @@ -6338,6 +6080,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [fcmla] safety: safe + big_endian_inverse: true types: - float16x4_t - float16x8_t @@ -6361,14 +6104,13 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - [float32x2_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]'] - - [float32x4_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float32x2_t, float32x4_t, ''] + - [float32x4_t, float32x4_t, 'q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, 1]] - - Let: - - c - - "{neon_type[0]}" - - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - Let: [c, {FnCall: [vreinterpretq_u64_f32, [c]]}] + - Let: [c, {FnCall: ['vdup{type[2]}_laneq_u64', [c], [LANE]]}] + - Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}] - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]] - name: "vcmla{neon_type[0].rot180_laneq}" @@ -6385,19 +6127,16 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - [float16x4_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] - - [float16x8_t, float16x8_t, - '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]' - ] + - [float16x4_t, float16x8_t, ''] + - [float16x8_t, float16x8_t, 'q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, 2]] - - Let: - - c - - "{neon_type[0]}" - - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - Let: [c, {FnCall: [vreinterpretq_u32_f16, [c]]}] + - Let: [c, {FnCall: ['vdup{type[2]}_laneq_u32', [c], [LANE]]}] + - Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}] - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]] - - name: "vcmla{type[3]}" + - name: "vcmla{neon_type[0].rot180_lane}" doc: Floating-point complex multiply accumulate arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] return_type: "{neon_type[0]}" @@ -6409,17 +6148,16 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]', '_rot180_lane_f32'] - - [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]', 'q_rot180_lane_f32'] + - [float32x2_t, float32x2_t, ''] + - [float32x4_t, float32x2_t, 'q'] compose: - FnCall: [static_assert!, ['LANE == 0']] - - Let: - - c - - "{neon_type[0]}" - - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - Let: [c, {FnCall: [vreinterpret_u64_f32, [c]]}] + - Let: [c, {FnCall: ['vdup{type[2]}_lane_u64', [c], [LANE]]}] + - Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}] - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]] - - name: "vcmla{type[3]}" + - name: "vcmla{neon_type[0].rot180_lane}" doc: Floating-point complex multiply accumulate arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] return_type: "{neon_type[0]}" @@ -6433,16 +6171,13 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]', '_rot180_lane_f16'] - - [float16x8_t, float16x4_t, - '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]', 'q_rot180_lane_f16' - ] + - [float16x4_t, float16x4_t, ''] + - [float16x8_t, float16x4_t, 'q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, 1]] - - Let: - - c - - "{neon_type[0]}" - - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - Let: [c, {FnCall: [vreinterpret_u32_f16, [c]]}] + - Let: [c, {FnCall: ['vdup{type[2]}_lane_u32', [c], [LANE]]}] + - Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}] - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]] - name: "vcmla{neon_type[0].rot270_laneq}" @@ -6457,14 +6192,13 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - [float32x2_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]'] - - [float32x4_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float32x2_t, float32x4_t, ''] + - [float32x4_t, float32x4_t, 'q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, 1]] - - Let: - - c - - "{neon_type[0]}" - - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - Let: [c, {FnCall: [vreinterpretq_u64_f32, [c]]}] + - Let: [c, {FnCall: ['vdup{type[2]}_laneq_u64', [c], [LANE]]}] + - Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}] - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]] - name: "vcmla{neon_type[0].rot270_laneq}" @@ -6481,14 +6215,13 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - [float16x4_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] - - [float16x8_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float16x4_t, float16x8_t, ''] + - [float16x8_t, float16x8_t, 'q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, 2]] - - Let: - - c - - "{neon_type[0]}" - - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - Let: [c, {FnCall: [vreinterpretq_u32_f16, [c]]}] + - Let: [c, {FnCall: ['vdup{type[2]}_laneq_u32', [c], [LANE]]}] + - Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}] - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]] - name: "vcmla{neon_type[0].lane_nox}" @@ -6503,14 +6236,13 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]'] - - [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float32x2_t, float32x2_t, ''] + - [float32x4_t, float32x2_t, 'q'] compose: - FnCall: [static_assert!, ['LANE == 0']] - - Let: - - c - - "{neon_type[0]}" - - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - Let: [c, {FnCall: [vreinterpret_u64_f32, [c]]}] + - Let: [c, {FnCall: ['vdup{type[2]}_lane_u64', [c], [LANE]]}] + - Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}] - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]] @@ -6528,14 +6260,13 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] - - [float16x8_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float16x4_t, float16x4_t, ''] + - [float16x8_t, float16x4_t, 'q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, 1]] - - Let: - - c - - "{neon_type[0]}" - - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]] + - Let: [c, {FnCall: [vreinterpret_u32_f16, [c]]}] + - Let: [c, {FnCall: ['vdup{type[2]}_lane_u32', [c], [LANE]]}] + - Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}] - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]] - name: "vcmla{neon_type[0].rot270_lane}" @@ -6550,11 +6281,13 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]'] - - [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float32x2_t, float32x2_t, ''] + - [float32x4_t, float32x2_t, 'q'] compose: - FnCall: [static_assert!, ['LANE == 0']] - - Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]}] + - Let: [c, {FnCall: [vreinterpret_u64_f32, [c]]}] + - Let: [c, {FnCall: ['vdup{type[2]}_lane_u64', [c], [LANE]]}] + - Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}] - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]] - name: "vcmla{neon_type[0].rot270_lane}" @@ -6571,11 +6304,13 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] - - [float16x8_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'] + - [float16x4_t, float16x4_t, ''] + - [float16x8_t, float16x4_t, 'q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, 1]] - - Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]}] + - Let: [c, {FnCall: [vreinterpret_u32_f16, [c]]}] + - Let: [c, {FnCall: ['vdup{type[2]}_lane_u32', [c], [LANE]]}] + - Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}] - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]] - name: "vmax{neon_type.no}" @@ -6869,7 +6604,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fminnmp]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float32x2_t, "f32"] @@ -6887,7 +6622,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fminnmv]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float32x4_t, "f32"] @@ -6902,36 +6637,36 @@ intrinsics: doc: Vector move arguments: ["a: {neon_type[0]}"] return_type: "{neon_type[1]}" - attr: [*neon-stable] - assert_instr: [sxtl2] + attr: + - *neon-stable + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sxtl2]]}]] safety: safe types: - - [int8x16_t, int16x8_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]'] - - [int16x8_t, int32x4_t, int16x4_t, '[4, 5, 6, 7]'] - - [int32x4_t, int64x2_t, int32x2_t, '[2, 3]'] + - [int8x16_t, int16x8_t] + - [int16x8_t, int32x4_t] + - [int32x4_t, int64x2_t] compose: - Let: - a - - "{neon_type[2]}" - - FnCall: [simd_shuffle!, [a, a, "{type[3]}"]] + - FnCall: ['vget_high_{neon_type[0]}', [a]] - FnCall: ["vmovl{neon_type[0].noq}", [a]] - name: "vmovl_high{neon_type[0].noq}" doc: Vector move arguments: ["a: {neon_type[0]}"] return_type: "{neon_type[1]}" - attr: [*neon-stable] - assert_instr: [uxtl2] + attr: + - *neon-stable + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [uxtl2]]}]] safety: safe types: - - [uint8x16_t, uint16x8_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]'] - - [uint16x8_t, uint32x4_t, uint16x4_t, '[4, 5, 6, 7]'] - - [uint32x4_t, uint64x2_t, uint32x2_t, '[2, 3]'] + - [uint8x16_t, uint16x8_t] + - [uint16x8_t, uint32x4_t] + - [uint32x4_t, uint64x2_t] compose: - Let: - a - - "{neon_type[2]}" - - FnCall: [simd_shuffle!, [a, a, "{type[3]}"]] + - FnCall: ['vget_high_{neon_type[0]}', [a]] - FnCall: ["vmovl{neon_type[0].noq}", [a]] - name: "vpadd{neon_type[0].no}" @@ -6941,6 +6676,7 @@ intrinsics: attr: [*neon-stable] assert_instr: [faddp] safety: safe + big_endian_inverse: true types: - [float32x4_t, "4"] - [float64x2_t, "2"] @@ -6963,6 +6699,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [faddp] safety: safe + big_endian_inverse: true types: - [float16x8_t, "8"] compose: @@ -6984,6 +6721,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [fmaxp] safety: safe + big_endian_inverse: true types: - float16x4_t - float16x8_t @@ -7005,6 +6743,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [fmaxnmp] safety: safe + big_endian_inverse: true types: - float16x4_t - float16x8_t @@ -7026,6 +6765,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [fminp] safety: safe + big_endian_inverse: true types: - float16x4_t - float16x8_t @@ -7047,6 +6787,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [fminnmp] safety: safe + big_endian_inverse: true types: - float16x4_t - float16x8_t @@ -7072,11 +6813,11 @@ intrinsics: - Let: - a1 - "{type[2]}" - - FnCall: [simd_extract!, [a, '0']] + - FnCall: ['vget{neon_type[1].lane_nox}', [a], [0]] - Let: - a2 - "{type[2]}" - - FnCall: [simd_extract!, [a, '1']] + - FnCall: ['vget{neon_type[1].lane_nox}', [a], [1]] - Identifier: ['a1 + a2', Symbol] - name: "vpmin{type[0]}" @@ -7086,6 +6827,7 @@ intrinsics: attr: [*neon-stable] assert_instr: [fminp] safety: safe + big_endian_inverse: true types: - ["s_f32", float32x2_t, f32] - ["qd_f64", float64x2_t, f64] @@ -7102,14 +6844,14 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["i16", "i32"] compose: - Let: [a, int16x4_t, {FnCall: [vdup_n_s16, [a]]}] - Let: [b, int16x4_t, {FnCall: [vdup_n_s16, [b]]}] - - FnCall: [simd_extract!, [{FnCall: [vqdmull_s16, [a, b]]}, '0']] + - FnCall: ['vgetq_lane_{type[1]}', [{FnCall: [vqdmull_s16, [a, b]]}], ['0']] - name: "vqdmulls_s32" doc: "Signed saturating doubling multiply long" @@ -7117,7 +6859,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["i32", "i64"] @@ -7133,15 +6875,15 @@ intrinsics: arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] return_type: "{neon_type[1]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmull2]]}]] + - *neon-stable safety: safe types: - - [int16x8_t, int32x4_t, int16x4_t, '[4, 5, 6, 7]'] - - [int32x4_t, int64x2_t, int32x2_t, '[2, 3]'] + - [int16x8_t, int32x4_t] + - [int32x4_t, int64x2_t] compose: - - Let: [a, "{neon_type[2]}", {FnCall: [simd_shuffle!, [a, a, '{type[3]}']]}] - - Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [b, b, '{type[3]}']]}] + - Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}] + - Let: [b, {FnCall: ['vget_high_{neon_type[0]}', [b]]}] - FnCall: ["vqdmull{neon_type[0].noq}", [a, b]] - name: "vqdmull_high_n_{type[1]}" @@ -7149,15 +6891,15 @@ intrinsics: arguments: ["a: {neon_type[0]}", "b: {type[1]}"] return_type: "{neon_type[2]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmull2]]}]] + - *neon-stable safety: safe types: - - [int16x8_t, "i16", int32x4_t, int16x4_t, '[4, 5, 6, 7]'] - - [int32x4_t, "i32", int64x2_t, int32x2_t, '[2, 3]'] + - [int16x8_t, "i16", int32x4_t] + - [int32x4_t, "i32", int64x2_t] compose: - - Let: [a, "{neon_type[3]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}] - - Let: [b, "{neon_type[3]}", {FnCall: ["vdup_n{neon_type[0].noq}", [b]]}] + - Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}] + - Let: [b, {FnCall: ["vdup_n{neon_type[0].noq}", [b]]}] - FnCall: ["vqdmull{neon_type[0].noq}", [a, b]] - name: "vqdmull{type[3]}" @@ -7167,7 +6909,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -7175,7 +6917,7 @@ intrinsics: - ["i32", int32x4_t, "i64", 's_laneq_s32', 's_s32'] compose: - FnCall: [static_assert_uimm_bits!, [N, 2]] - - Let: [b, "{type[0]}", {FnCall: [simd_extract!, [b, 'N as u32']]}] + - Let: [b, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [b], [N]]}] - FnCall: ["vqdmull{type[4]}", [a, b]] - name: "vqdmullh_laneq_s16" @@ -7185,14 +6927,14 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, N = 4]]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: - ["i16", int16x8_t, "i32"] compose: - FnCall: [static_assert_uimm_bits!, [N, 3]] - - Let: [b, "{type[0]}", {FnCall: [simd_extract!, [b, 'N as u32']]}] + - Let: [b, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [b], [N]]}] - FnCall: ["vqdmullh_s16", [a, b]] - name: "vqdmulls_lane_s32" @@ -7202,33 +6944,33 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, 'N = 1']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: - ["i32", int32x2_t, "i64"] compose: - FnCall: [static_assert_uimm_bits!, [N, 1]] - - Let: [b, "{type[0]}", {FnCall: [simd_extract!, [b, 'N as u32']]}] + - Let: [b, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [b], [N]]}] - FnCall: ["vqdmulls_s32", [a, b]] - - name: "vqdmull{type[6]}" + - name: "vqdmull{type[3]}" doc: "Signed saturating doubling multiply long" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] return_type: "{neon_type[2]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2, 'N = 2']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmull2, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: - - [int16x8_t, int16x4_t, int32x4_t, int16x4_t, '[4, 5, 6, 7]', '[N as u32, N as u32, N as u32, N as u32]', '_high_lane_s16'] - - [int32x4_t, int32x4_t, int64x2_t, int32x2_t, '[2, 3]', '[N as u32, N as u32]', '_high_laneq_s32'] + - [int16x8_t, int16x4_t, int32x4_t, '_high_lane_s16'] + - [int32x4_t, int32x4_t, int64x2_t, '_high_laneq_s32'] compose: - FnCall: [static_assert_uimm_bits!, [N, '2']] - - Let: [a, "{neon_type[3]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}] - - Let: [b, "{neon_type[3]}", {FnCall: [simd_shuffle!, [b, b, "{type[5]}"]]}] + - Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}] + - Let: [b, {FnCall: ['vdup_lane{neon_type[1].nox}', [b], [N]]}] - FnCall: ["vqdmull{neon_type[0].noq}", [a, b]] - name: "vqdmull_high_lane_s32" @@ -7236,17 +6978,17 @@ intrinsics: arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] return_type: "{neon_type[2]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2, 'N = 1']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmull2, 'N = 1']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: - - [int32x4_t, int32x2_t, int64x2_t, int32x2_t, '[2, 3]', '[N as u32, N as u32]'] + - [int32x4_t, int32x2_t, int64x2_t] compose: - FnCall: [static_assert_uimm_bits!, [N, '1']] - - Let: [a, "{neon_type[3]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}] - - Let: [b, "{neon_type[3]}", {FnCall: [simd_shuffle!, [b, b, "{type[5]}"]]}] + - Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}] + - Let: [b, {FnCall: ['vdup_lane{neon_type[1].nox}', [b], [N]]}] - FnCall: ["vqdmull{neon_type[0].noq}", [a, b]] - name: "vqdmull_high_laneq_s16" @@ -7254,17 +6996,17 @@ intrinsics: arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] return_type: "{neon_type[2]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2, N = 4]]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmull2, N = 4]]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: - - [int16x8_t, int16x8_t, int32x4_t, int16x4_t, '[4, 5, 6, 7]', '[N as u32, N as u32, N as u32, N as u32]'] + - [int16x8_t, int16x8_t, int32x4_t] compose: - FnCall: [static_assert_uimm_bits!, [N, '3']] - - Let: [a, "{neon_type[3]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}] - - Let: [b, "{neon_type[3]}", {FnCall: [simd_shuffle!, [b, b, "{type[5]}"]]}] + - Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}] + - Let: [b, {FnCall: ['vdup_lane{neon_type[1].nox}', [b], [N]]}] - FnCall: ["vqdmull{neon_type[0].noq}", [a, b]] - name: "vqdmull_laneq_s16" @@ -7274,14 +7016,14 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, 'N = 4']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: - - [int16x4_t, int16x8_t, int32x4_t, '[N as u32, N as u32, N as u32, N as u32]'] + - [int16x4_t, int16x8_t, int32x4_t] compose: - FnCall: [static_assert_uimm_bits!, [N, '3']] - - Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}] + - Let: [b, {FnCall: ['vdup_lane{neon_type[1].nox}', [b], [N]]}] - FnCall: [vqdmull_s16, [a, b]] - name: "vqdmull_laneq_s32" @@ -7291,14 +7033,14 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: - - [int32x2_t, int32x4_t, int64x2_t, '[N as u32, N as u32]'] + - [int32x2_t, int32x4_t, int64x2_t] compose: - FnCall: [static_assert_uimm_bits!, [N, '2']] - - Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}] + - Let: [b, {FnCall: ['vdup_lane{neon_type[1].nox}', [b], [N]]}] - FnCall: [vqdmull_s32, [a, b]] - name: "vqdmlal{type[4]}" @@ -7306,8 +7048,8 @@ intrinsics: arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"] return_type: "{neon_type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal2]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlal2]]}]] + - *neon-stable safety: safe types: - [int32x4_t, int16x8_t, int16x8_t, int32x4_t, _high_s16] @@ -7322,9 +7064,9 @@ intrinsics: arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] return_type: "{neon_type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal2, 'N = 1']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlal2, 'N = 1']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -7341,14 +7083,14 @@ intrinsics: arguments: ["a: {type[0]}", "b: {type[1]}", "c: {type[1]}"] return_type: "{type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlal]]}]] + - *neon-stable safety: safe types: - ["i32", "i16", "s16"] compose: - Let: [x, int32x4_t, {FnCall: [vqdmull_s16, [{FnCall: [vdup_n_s16, [b]]}, {FnCall: [vdup_n_s16, [c]]}]]}] - - FnCall: [vqadds_s32, [a, {FnCall: [simd_extract!, [x, 0]]}]] + - FnCall: [vqadds_s32, [a, {FnCall: ['vgetq_lane_s32', [x], [0]]}]] - name: "vqdmlals_s32" doc: "Signed saturating doubling multiply-add long" @@ -7356,7 +7098,7 @@ intrinsics: return_type: "{type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["i64", "i32", "i32", "i64"] @@ -7369,9 +7111,9 @@ intrinsics: arguments: ["a: {type[0]}", "b: {type[1]}", "c: {neon_type[2]}"] return_type: "{type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlal, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE: i32'] safety: safe types: @@ -7381,16 +7123,16 @@ intrinsics: - ["i64", "i32", int32x4_t, "i64", s_laneq_s32, '2', s_s32] compose: - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]] - - FnCall: ["vqdmlal{type[6]}", [a, b, {FnCall: [simd_extract!, [c, 'LANE as u32']]}]] + - FnCall: ["vqdmlal{type[6]}", [a, b, {FnCall: ['vget{neon_type[2].lane_nox}', [c], [LANE]]}]] - name: "vqdmlal_laneq_s16" doc: "Vector widening saturating doubling multiply accumulate with scalar" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] return_type: "{neon_type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal, 'N = 2']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlal, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -7404,9 +7146,9 @@ intrinsics: arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] return_type: "{neon_type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal, 'N = 1']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlal, 'N = 1']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -7420,8 +7162,8 @@ intrinsics: arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"] return_type: "{neon_type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl2]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlsl2]]}]] + - *neon-stable safety: safe types: - [int32x4_t, int16x8_t, int16x8_t, int32x4_t, _high_s16] @@ -7436,9 +7178,9 @@ intrinsics: arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] return_type: "{neon_type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl2, 'N = 1']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlsl2, 'N = 1']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -7455,14 +7197,14 @@ intrinsics: arguments: ["a: {type[0]}", "b: {type[1]}", "c: {type[1]}"] return_type: "{type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlsl]]}]] + - *neon-stable safety: safe types: - ["i32", "i16"] compose: - Let: [x, int32x4_t, {FnCall: [vqdmull_s16, [{FnCall: [vdup_n_s16, [b]]}, {FnCall: [vdup_n_s16, [c]]}]]}] - - FnCall: [vqsubs_s32, [a, {FnCall: [simd_extract!, [x, '0']]}]] + - FnCall: [vqsubs_s32, [a, {FnCall: ['vgetq_lane_s32', [x], [0]]}]] - name: "vqdmlsls_s32" doc: "Signed saturating doubling multiply-subtract long" @@ -7470,7 +7212,7 @@ intrinsics: return_type: "{type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["i64", "i32", "i32", "i64"] @@ -7483,9 +7225,9 @@ intrinsics: arguments: ["a: {type[0]}", "b: {type[1]}", "c: {neon_type[2]}"] return_type: "{type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlsl, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE: i32'] safety: safe types: @@ -7495,16 +7237,16 @@ intrinsics: - ["i64", "i32", int32x4_t, "i64", 's_laneq_s32', '2', 's_s32'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]] - - FnCall: ["vqdmlsl{type[6]}", [a, b, {FnCall: [simd_extract!, [c, 'LANE as u32']]}]] + - FnCall: ["vqdmlsl{type[6]}", [a, b, {FnCall: ['vget{neon_type[2].lane_nox}', [c], [LANE]]}]] - name: "vqdmlsl_laneq_s16" doc: "Vector widening saturating doubling multiply subtract with scalar" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] return_type: "{neon_type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl, 'N = 2']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlsl, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -7518,9 +7260,9 @@ intrinsics: arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] return_type: "{neon_type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl, 'N = 1']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlsl, 'N = 1']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -7535,7 +7277,7 @@ intrinsics: return_type: "{type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmulh]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["i16", "i16", "i16", int16x4_t, 'h_s16'] @@ -7543,7 +7285,7 @@ intrinsics: compose: - Let: [a, "{neon_type[3]}", {FnCall: ["vdup_n{neon_type[3].no}", [a]]}] - Let: [b, "{neon_type[3]}", {FnCall: ["vdup_n{neon_type[3].no}", [b]]}] - - FnCall: [simd_extract!, [{FnCall: ["vqdmulh{neon_type[3].no}", [a, b]]}, '0']] + - FnCall: ['vget{neon_type[3].lane_nox}', [{FnCall: ["vqdmulh{neon_type[3].no}", [a, b]]}], ['0']] - name: "vqdmulhh{type[3]}" doc: "Signed saturating doubling multiply returning high half" @@ -7552,7 +7294,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmulh, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -7560,7 +7302,7 @@ intrinsics: - ["i16", int16x8_t, "i16", '_laneq_s16', '3'] compose: - FnCall: [static_assert_uimm_bits!, [N, "{type[4]}"]] - - Let: [b, 'i16', {FnCall: [simd_extract!, [b, 'N as u32']]}] + - Let: [b, 'i16', {FnCall: ['vget{neon_type[1].lane_nox}', [b], [N]]}] - FnCall: ['vqdmulhh_s16', [a, b]] - name: "vqdmulhs{type[3]}" @@ -7570,7 +7312,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmulh, 'N = 1']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -7578,7 +7320,7 @@ intrinsics: - ["i32", int32x4_t, "i32", "_laneq_s32", '2'] compose: - FnCall: [static_assert_uimm_bits!, [N, "{type[4]}"]] - - Let: [b, 'i32', {FnCall: [simd_extract!, [b, 'N as u32']]}] + - Let: [b, 'i32', {FnCall: ['vget{neon_type[1].lane_nox}', [b], [N]]}] - FnCall: ['vqdmulhs_s32', [a, b]] - name: "vqmovn_high{neon_type[1].noq}" @@ -7586,30 +7328,30 @@ intrinsics: arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] return_type: "{neon_type[2]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtn2]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqxtn2]]}]] + - *neon-stable safety: safe types: - - [int8x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - [int16x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]'] - - [int32x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]'] + - [int8x8_t, int16x8_t, int8x16_t] + - [int16x4_t, int32x4_t, int16x8_t] + - [int32x2_t, int64x2_t, int32x4_t] compose: - - FnCall: [simd_shuffle!, [a, {FnCall: ["vqmovn{neon_type[1].noq}", [b]]}, "{type[3]}"]] + - FnCall: ['vcombine_{neon_type[0]}', [a, {FnCall: ["vqmovn{neon_type[1].noq}", [b]]}]] - name: "vqmovn_high{neon_type[1].noq}" doc: "Signed saturating extract narrow" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] return_type: "{neon_type[2]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqxtn2]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [uqxtn2]]}]] + - *neon-stable safety: safe types: - - [uint8x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - [uint16x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]'] - - [uint32x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]'] + - [uint8x8_t, uint16x8_t, uint8x16_t] + - [uint16x4_t, uint32x4_t, uint16x8_t] + - [uint32x2_t, uint64x2_t, uint32x4_t] compose: - - FnCall: [simd_shuffle!, [a, {FnCall: ["vqmovn{neon_type[1].noq}", [b]]}, "{type[3]}"]] + - FnCall: ['vcombine_{neon_type[0]}', [a, {FnCall: ["vqmovn{neon_type[1].noq}", [b]]}]] - name: "vqmovn{type[2]}" doc: "Saturating extract narrow" @@ -7617,13 +7359,13 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtn]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["i16", "i8", 'h_s16', s16] - ["i32", "i16", 's_s32', s32] compose: - - FnCall: [simd_extract!, [{FnCall: ["vqmovn_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}, '0']] + - FnCall: ['vget_lane_{type[1]}', [{FnCall: ["vqmovn_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}], ['0']] - name: "vqmovn{type[2]}" doc: "Saturating extract narrow" @@ -7631,13 +7373,13 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqxtn]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["u16", "u8", 'h_u16', 'u16'] - ["u32", "u16", 's_u32', 'u32'] compose: - - FnCall: [simd_extract!, [{FnCall: ["vqmovn_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}, '0']] + - FnCall: ['vget_lane_{type[1]}', [{FnCall: ["vqmovn_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}], ['0']] - name: "vqmovnd_s64" doc: "Saturating extract narrow" @@ -7645,7 +7387,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtn]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["i64", "i32"] @@ -7662,7 +7404,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqxtn]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["u64", "u32"] @@ -7679,29 +7421,29 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtun]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["i16", "u8", 'h_s16', s16] - ["i32", "u16", 's_s32', s32] - ["i64", "u32", 'd_s64', s64] compose: - - FnCall: [simd_extract!, [{FnCall: ["vqmovun_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}, '0']] + - FnCall: ['vget_lane_{type[1]}', [{FnCall: ["vqmovun_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}], ['0']] - name: "vqmovun_high_{neon_type[1]}" doc: "Signed saturating extract unsigned narrow" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] return_type: "{neon_type[2]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtun2]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqxtun2]]}]] + - *neon-stable safety: safe types: - - [uint8x8_t, int16x8_t, uint8x16_t, s16, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - [uint16x4_t, int32x4_t, uint16x8_t, s32, '[0, 1, 2, 3, 4, 5, 6, 7]'] - - [uint32x2_t, int64x2_t, uint32x4_t, s64, '[0, 1, 2, 3]'] + - [uint8x8_t, int16x8_t, uint8x16_t, s16] + - [uint16x4_t, int32x4_t, uint16x8_t, s32] + - [uint32x2_t, int64x2_t, uint32x4_t, s64] compose: - - FnCall: [simd_shuffle!, [a, {FnCall: ["vqmovun_{type[3]}", [b]]}, "{type[4]}"]] + - FnCall: ['vcombine_{neon_type[0]}', [a, {FnCall: ["vqmovun_{type[3]}", [b]]}]] - name: "vqrdmulh{type[1]}" doc: "Signed saturating rounding doubling multiply returning high half" @@ -7709,13 +7451,13 @@ intrinsics: return_type: "{type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrdmulh]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["i16", 'h_s16', 's16'] - ["i32", 's_s32', 's32'] compose: - - FnCall: [simd_extract!, [{FnCall: ["vqrdmulh_{type[2]}", [{FnCall: ["vdup_n_{type[2]}", [a]]}, {FnCall: ["vdup_n_{type[2]}", [b]]}]]}, '0']] + - FnCall: ['vget_lane_{type[2]}', [{FnCall: ["vqrdmulh_{type[2]}", [{FnCall: ["vdup_n_{type[2]}", [a]]}, {FnCall: ["vdup_n_{type[2]}", [b]]}]]}], ['0']] - name: "vqrdmulh{type[2]}" doc: "Signed saturating rounding doubling multiply returning high half" @@ -7724,7 +7466,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrdmulh, LANE = 1]]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE: i32'] safety: safe types: @@ -7734,7 +7476,7 @@ intrinsics: - ["i32", int32x4_t, 's_laneq_s32', 's_s32', '2'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, "{type[4]}"]] - - FnCall: ["vqrdmulh{type[3]}", [a, {FnCall: [simd_extract!, [b, 'LANE as u32']]}]] + - FnCall: ["vqrdmulh{type[3]}", [a, {FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]]}]] - name: "vqrdmlah{neon_type.no}" doc: "Signed saturating rounding doubling multiply accumulate returning high half" @@ -7773,7 +7515,7 @@ intrinsics: - Let: [a, "{neon_type[1]}", {FnCall: ["vdup_n_{type[2]}", [a]]}] - Let: [b, "{neon_type[1]}", {FnCall: ["vdup_n_{type[2]}", [b]]}] - Let: [c, "{neon_type[1]}", {FnCall: ["vdup_n_{type[2]}", [c]]}] - - FnCall: [simd_extract!, [{FnCall: ["vqrdmlah_{type[2]}", [a, b, c]]}, '0']] + - FnCall: ['vget_lane_{type[2]}', [{FnCall: ["vqrdmlah_{type[2]}", [a, b, c]]}], ['0']] - name: "vqrdmlah{type[0]}" doc: "Signed saturating rounding doubling multiply accumulate returning high half" @@ -7787,17 +7529,17 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32; 4]'] - - [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32; 4]'] - - [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2', '[LANE as u32; 8]'] - - [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3', '[LANE as u32; 8]'] - - [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1', '[LANE as u32; 2]'] - - [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2', '[LANE as u32; 2]'] - - [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1', '[LANE as u32; 4]'] - - [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2', '[LANE as u32; 4]'] + - [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2'] + - [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3'] + - [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2'] + - [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3'] + - [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1'] + - [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2'] + - [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1'] + - [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']] - - Let: [c, "{type[1]}", {FnCall: [simd_shuffle!, [c, c, "{type[5]}"]]}] + - Let: [c, {FnCall: ['vdup{type[0]}', [c], [LANE]]}] - FnCall: ["vqrdmlah{neon_type[2].no}", [a, b, c]] - name: "vqrdmlah{type[4]}" @@ -7818,7 +7560,7 @@ intrinsics: - ["i32", int32x4_t, '2', "s_s32", s_laneq_s32, s_s32] compose: - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] - - FnCall: ["vqrdmlah{type[5]}", [a, b, {FnCall: [simd_extract!, [c, 'LANE as u32']]}]] + - FnCall: ["vqrdmlah{type[5]}", [a, b, {FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]] - name: "vqrdmlsh{neon_type.no}" doc: "Signed saturating rounding doubling multiply subtract returning high half" @@ -7857,7 +7599,7 @@ intrinsics: - Let: [a, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [a]]}] - Let: [b, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [b]]}] - Let: [c, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [c]]}] - - FnCall: [simd_extract!, [{FnCall: ["vqrdmlsh_{type[3]}", [a, b, c]]}, '0']] + - FnCall: ['vget{neon_type[2].lane_nox}', [{FnCall: ["vqrdmlsh_{type[3]}", [a, b, c]]}], ['0']] - name: "vqrdmlsh{type[0]}" doc: "Signed saturating rounding doubling multiply subtract returning high half" @@ -7871,17 +7613,17 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32; 4]'] - - [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32; 4]'] - - [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2', '[LANE as u32; 8]'] - - [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3', '[LANE as u32; 8]'] - - [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1', '[LANE as u32; 2]'] - - [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2', '[LANE as u32; 2]'] - - [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1', '[LANE as u32; 4]'] - - [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2', '[LANE as u32; 4]'] + - [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2'] + - [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3'] + - [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2'] + - [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3'] + - [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1'] + - [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2'] + - [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1'] + - [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']] - - Let: [c, "{type[1]}", {FnCall: [simd_shuffle!, [c, c, "{type[5]}"]]}] + - Let: [c, {FnCall: ['vdup{type[0]}', [c], [LANE]]}] - FnCall: ["vqrdmlsh{neon_type[2].no}", [a, b, c]] - name: "vqrdmlsh{type[3]}" @@ -7902,7 +7644,7 @@ intrinsics: - ["i32", int32x4_t, '2', s_laneq_s32, s_s32] compose: - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]] - - FnCall: ["vqrdmlsh{type[4]}", [a, b, {FnCall: [simd_extract!, [c, 'LANE as u32']]}]] + - FnCall: ["vqrdmlsh{type[4]}", [a, b, {FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]] - name: "vqrshl{type[0]}" doc: "Signed saturating rounding shift left" @@ -7910,7 +7652,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ['s_s32', "i32"] @@ -7928,7 +7670,7 @@ intrinsics: return_type: "{type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["i8", 'b_s8', int8x8_t, s8] @@ -7936,7 +7678,7 @@ intrinsics: compose: - Let: [a, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [a]]}] - Let: [b, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [b]]}] - - FnCall: [simd_extract!, [{FnCall: ["vqrshl_{type[3]}", [a, b]]}, '0']] + - FnCall: ['vget{neon_type[2].lane_nox}', [{FnCall: ["vqrshl_{type[3]}", [a, b]]}], ['0']] - name: "vqrshl{type[2]}" doc: "Unsigned signed saturating rounding shift left" @@ -7944,7 +7686,7 @@ intrinsics: return_type: "{type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqrshl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["u32", "i32", 's_u32'] @@ -7962,7 +7704,7 @@ intrinsics: return_type: "{type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqrshl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["u8", "i8", "b_u8", uint8x8_t, int8x8_t, s8] @@ -7970,7 +7712,7 @@ intrinsics: compose: - Let: [a, "{neon_type[3]}", {FnCall: ["vdup_n_{type[0]}", [a]]}] - Let: [b, "{neon_type[4]}", {FnCall: ["vdup_n_{type[5]}", [b]]}] - - FnCall: [simd_extract!, [{FnCall: ["vqrshl_{type[0]}", [a, b]]}, '0']] + - FnCall: ['vget{neon_type[3].lane_nox}', [{FnCall: ["vqrshl_{type[0]}", [a, b]]}], ['0']] - name: "vqrshrn{type[2]}" doc: "Signed saturating rounded shift right narrow" @@ -7979,7 +7721,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrn, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -7989,25 +7731,25 @@ intrinsics: compose: - FnCall: [static_assert!, ["{type[3]}"]] - Let: [a, "{neon_type[4]}", {FnCall: ["vdup{type[5]}", [a]]}] - - FnCall: [simd_extract!, [{FnCall: ["vqrshrn_n{neon_type[4].noq}::", [a]]}, '0']] + - FnCall: ['vget_lane_{type[1]}', [{FnCall: ["vqrshrn_n{neon_type[4].noq}::", [a]]}], ['0']] - name: "vqrshrn{type[3]}" doc: "Signed saturating rounded shift right narrow" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] return_type: "{neon_type[2]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrn2, 'N = 2']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqrshrn2, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: - - [int8x8_t, int16x8_t, int8x16_t, '_high_n_s16', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]', 'N >= 1 && N <= 8'] - - [int16x4_t, int32x4_t, int16x8_t, '_high_n_s32', '[0, 1, 2, 3, 4, 5, 6, 7]', 'N >= 1 && N <= 16'] - - [int32x2_t, int64x2_t, int32x4_t, '_high_n_s64', '[0, 1, 2, 3]', 'N >= 1 && N <= 32'] + - [int8x8_t, int16x8_t, int8x16_t, '_high_n_s16', 'N >= 1 && N <= 8'] + - [int16x4_t, int32x4_t, int16x8_t, '_high_n_s32', 'N >= 1 && N <= 16'] + - [int32x2_t, int64x2_t, int32x4_t, '_high_n_s64', 'N >= 1 && N <= 32'] compose: - - FnCall: [static_assert!, ["{type[5]}"]] - - FnCall: [simd_shuffle!, [a, {FnCall: ["vqrshrn_n{neon_type[1].noq}::", [b]]}, "{type[4]}"]] + - FnCall: [static_assert!, ["{type[4]}"]] + - FnCall: ['vcombine_{neon_type[0]}', [a, {FnCall: ["vqrshrn_n{neon_type[1].noq}::", [b]]}]] - name: "vqrshrn{type[0]}" doc: "Unsigned saturating rounded shift right narrow" @@ -8016,7 +7758,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqrshrn, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -8026,31 +7768,30 @@ intrinsics: compose: - FnCall: [static_assert!, ['{type[3]}']] - Let: [a, "{neon_type[4]}", {FnCall: ["vdup{type[5]}", [a]]}] - - FnCall: [simd_extract!, [{FnCall: ["vqrshrn{type[6]}::", [a]]}, '0']] + - FnCall: ['vget_lane_{type[2]}', [{FnCall: ["vqrshrn{type[6]}::", [a]]}], ['0']] - name: "vqrshrn_high_n{neon_type[1].noq}" doc: "Unsigned saturating rounded shift right narrow" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] return_type: "{neon_type[2]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqrshrn2, 'N = 2']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [uqrshrn2, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: - - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]'] - - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]'] + - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8'] + - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16'] + - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32'] compose: - FnCall: [static_assert!, ['{type[3]}']] - FnCall: - - simd_shuffle! + - 'vcombine_{neon_type[0]}' - - a - FnCall: - "vqrshrn_n{neon_type[1].noq}::" - - b - - "{type[4]}" - name: "vqrshrun{type[0]}" doc: "Signed saturating rounded shift right unsigned narrow" @@ -8059,7 +7800,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrun, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -8073,35 +7814,34 @@ intrinsics: - "{neon_type[4]}" - FnCall: ["vdupq_n_{type[5]}", [a]] - FnCall: - - simd_extract! + - 'vget_lane_{type[2]}' - - FnCall: - "vqrshrun_n_{type[5]}::" - - a - - '0' + - - '0' - name: "vqrshrun_high_n{neon_type[1].noq}" doc: "Signed saturating rounded shift right unsigned narrow" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] return_type: "{neon_type[2]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrun2, 'N = 2']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqrshrun2, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: - - [uint8x8_t, int16x8_t, uint8x16_t, 'N >= 1 && N <= 8', s16, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - [uint16x4_t, int32x4_t, uint16x8_t, 'N >= 1 && N <= 16', s32, '[0, 1, 2, 3, 4, 5, 6, 7]'] - - [uint32x2_t, int64x2_t, uint32x4_t, 'N >= 1 && N <= 32', s64, '[0, 1, 2, 3]'] + - [uint8x8_t, int16x8_t, uint8x16_t, 'N >= 1 && N <= 8'] + - [uint16x4_t, int32x4_t, uint16x8_t, 'N >= 1 && N <= 16'] + - [uint32x2_t, int64x2_t, uint32x4_t, 'N >= 1 && N <= 32'] compose: - FnCall: [static_assert!, ["{type[3]}"]] - FnCall: - - simd_shuffle! + - 'vcombine_{neon_type[0]}' - - a - FnCall: - - "vqrshrun_n_{type[4]}::" + - "vqrshrun_n_{neon_type[1]}::" - - b - - "{type[5]}" - name: "vqshld_{type}" doc: "Signed saturating shift left" @@ -8109,7 +7849,7 @@ intrinsics: return_type: "{type}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - i64 @@ -8126,7 +7866,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [b_s8, "i8", int8x8_t] @@ -8140,7 +7880,7 @@ intrinsics: - "vqshl{neon_type[2].noq}" - - FnCall: ["vdup_n{neon_type[2].no}", [a]] - FnCall: ["vdup_n{neon_type[2].no}", [b]] - - FnCall: [simd_extract!, [c, '0']] + - FnCall: ['vget{neon_type[2].lane_nox}', [c], ['0']] - name: "vqshl{type[0]}" doc: "Signed saturating shift left" @@ -8149,7 +7889,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshl, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -8160,11 +7900,11 @@ intrinsics: compose: - FnCall: [static_assert_uimm_bits!, [N, "{type[2]}"]] - FnCall: - - simd_extract! + - 'vget_lane_{type[1]}' - - FnCall: - "vqshl_n_{type[3]}::" - - FnCall: ["vdup_n_{type[3]}", [a]] - - '0' + - - '0' - name: "vqshld_{type[0]}" doc: "Unsigned saturating shift left" @@ -8172,7 +7912,7 @@ intrinsics: return_type: "{type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["u64", "i64"] @@ -8189,7 +7929,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [b_u8, "u8", "i8", uint8x8_t, int8x8_t] @@ -8203,7 +7943,7 @@ intrinsics: - "vqshl{neon_type[3].noq}" - - FnCall: ["vdup{neon_type[3].N}", [a]] - FnCall: ["vdup{neon_type[4].N}", [b]] - - FnCall: [simd_extract!, [c, '0']] + - FnCall: ['vget{neon_type[3].lane_nox}', [c], ['0']] - name: "vqshl{type[0]}" doc: "Unsigned saturating shift left" @@ -8212,7 +7952,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshl, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -8223,9 +7963,9 @@ intrinsics: compose: - FnCall: [static_assert_uimm_bits!, [N, "{type[2]}"]] - FnCall: - - simd_extract! + - 'vget_lane_{type[1]}' - - FnCall: ["vqshl_n_{type[1]}::", [{FnCall: ["vdup_n_{type[1]}", [a]]}]] - - '0' + - - '0' - name: "vqshrnd_n_s64" doc: "Signed saturating shift right narrow" @@ -8234,7 +7974,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrn, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -8258,7 +7998,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrn, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -8267,33 +8007,32 @@ intrinsics: compose: - FnCall: [static_assert!, ["{type[3]}"]] - FnCall: - - simd_extract! + - 'vget_lane_{type[2]}' - - FnCall: - "vqshrn_n_{type[4]}::" - - FnCall: ["vdupq_n_{type[4]}", [a]] - - '0' + - - '0' - name: "vqshrn{type[0]}" doc: "Signed saturating shift right narrow" arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}"] return_type: "{neon_type[3]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrn2, 'N = 2']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqshrn2, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: - - [_high_n_s16, int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]', s16] - - [_high_n_s32, int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]', s32] - - [_high_n_s64, int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]', s64] + - [_high_n_s16, int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8'] + - [_high_n_s32, int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16'] + - [_high_n_s64, int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32'] compose: - FnCall: [static_assert!, ["{type[4]}"]] - FnCall: - - simd_shuffle! + - 'vcombine_{neon_type[1]}' - - a - - FnCall: ["vqshrn_n_{type[6]}::", [b]] - - "{type[5]}" + - FnCall: ["vqshrn_n_{neon_type[2]}::", [b]] - name: "vqshrnd_n_u64" doc: "Unsigned saturating shift right narrow" @@ -8302,7 +8041,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshrn, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -8326,7 +8065,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshrn, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -8335,33 +8074,32 @@ intrinsics: compose: - FnCall: [static_assert!, ["{type[3]}"]] - FnCall: - - "simd_extract!" + - 'vget_lane_{type[2]}' - - FnCall: - "vqshrn_n_{type[1]}::" - - FnCall: ["vdupq_n_{type[1]}", [a]] - - '0' + - - '0' - name: "vqshrn{type[0]}" doc: "Unsigned saturating shift right narrow" arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}"] return_type: "{neon_type[3]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshrn2, 'N = 2']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [uqshrn2, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: - - [_high_n_u16, uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - [_high_n_u32, uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]'] - - [_high_n_u64, uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]'] + - [_high_n_u16, uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8'] + - [_high_n_u32, uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16'] + - [_high_n_u64, uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32'] compose: - FnCall: [static_assert!, ["{type[4]}"]] - FnCall: - - simd_shuffle! + - 'vcombine_{neon_type[1]}' - - a - FnCall: ["vqshrn_n_{neon_type[2]}::", [b]] - - "{type[5]}" - name: "vqshrun{type[0]}" doc: "Signed saturating shift right unsigned narrow" @@ -8370,7 +8108,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrun, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -8380,33 +8118,32 @@ intrinsics: compose: - FnCall: [static_assert!, ["{type[3]}"]] - FnCall: - - simd_extract! + - 'vget_lane_{type[2]}' - - FnCall: - "vqshrun_n_{type[4]}::" - - FnCall: ["vdupq_n_{type[4]}", [a]] - - '0' + - - '0' - name: "vqshrun_high_n_{neon_type[1]}" doc: "Signed saturating shift right unsigned narrow" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] return_type: "{neon_type[2]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrun2, 'N = 2']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqshrun2, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: - - [uint8x8_t, int16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - [uint16x4_t, int32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]'] - - [uint32x2_t, int64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]'] + - [uint8x8_t, int16x8_t, uint8x16_t, 'N >= 1 && N <= 8'] + - [uint16x4_t, int32x4_t, uint16x8_t, 'N >= 1 && N <= 16'] + - [uint32x2_t, int64x2_t, uint32x4_t, 'N >= 1 && N <= 32'] compose: - FnCall: [static_assert!, ["{type[3]}"]] - FnCall: - - simd_shuffle! + - 'vcombine_{neon_type[0]}' - - a - FnCall: ["vqshrun_n_{neon_type[1]}::", [b]] - - "{type[4]}" - name: "vsqadd{type[0]}" doc: "Unsigned saturating accumulate of signed value" @@ -8414,19 +8151,19 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [usqadd]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [b_u8, "u8", "i8", s8] - [h_u16, "u16", "i16", s16] compose: - FnCall: - - simd_extract! + - 'vget_lane_{type[1]}' - - FnCall: - "vsqadd_{type[1]}" - - FnCall: ["vdup_n_{type[1]}", [a]] - FnCall: ["vdup_n_{type[2]}", [b]] - - '0' + - - '0' - name: "vsqadd{type[0]}" doc: "Unsigned saturating accumulate of signed value" @@ -8434,7 +8171,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [usqadd]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [s_u32, "u32", "i32"] @@ -8452,7 +8189,7 @@ intrinsics: return_type: "{neon_type}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fsqrt]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - float32x2_t @@ -8499,7 +8236,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrts]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [_f64, float64x1_t, v1f64] @@ -8517,7 +8254,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrts]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [s_f32, "f32"] @@ -8556,7 +8293,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecpe]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [_f64, float64x1_t, v1f64] @@ -8574,7 +8311,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecpe]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [s_f32, "f32"] @@ -8613,7 +8350,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecps]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [_f64, float64x1_t, v1f64] @@ -8631,7 +8368,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecps]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [s_f32, "f32"] @@ -8670,7 +8407,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecpx]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [s_f32, "f32"] @@ -8702,7 +8439,6 @@ intrinsics: - link: "llvm.aarch64.neon.frecpx.{type[1]}" arch: aarch64,arm64ec - - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" doc: Vector reinterpret cast operation arguments: ["a: {type[0]}"] @@ -8719,54 +8455,65 @@ intrinsics: - [poly64x2_t, uint64x2_t] - [int64x2_t, poly64x2_t] - [uint64x2_t, poly64x2_t] + - [float64x1_t, int64x1_t] + - [float64x2_t, int64x2_t] + - [float64x1_t, uint64x1_t] + - [float64x2_t, uint64x2_t] + - [float64x1_t, poly64x1_t] + - [float64x2_t, poly64x2_t] + - [int64x1_t, float64x1_t] + - [int64x2_t, float64x2_t] + - [uint64x1_t, float64x1_t] + - [uint64x2_t, float64x2_t] + - [poly64x1_t, float64x1_t] + - [poly64x2_t, float64x2_t] + compose: + - FnCall: [transmute, [a]] + + - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" + doc: Vector reinterpret cast operation + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: [*neon-stable] + assert_instr: [nop] + safety: safe + types: - [float64x1_t, int8x8_t] - [float64x1_t, int16x4_t] - [float64x1_t, int32x2_t] - - [float64x1_t, int64x1_t] - [float64x2_t, int8x16_t] - [float64x2_t, int16x8_t] - [float64x2_t, int32x4_t] - - [float64x2_t, int64x2_t] - [float64x1_t, uint8x8_t] - [float64x1_t, uint16x4_t] - [float64x1_t, uint32x2_t] - - [float64x1_t, uint64x1_t] - [float64x2_t, uint8x16_t] - [float64x2_t, uint16x8_t] - [float64x2_t, uint32x4_t] - - [float64x2_t, uint64x2_t] - [float64x1_t, poly8x8_t] - [float64x1_t, poly16x4_t] - [float32x2_t, poly64x1_t] - - [float64x1_t, poly64x1_t] - [float64x2_t, poly8x16_t] - [float64x2_t, poly16x8_t] - [float32x4_t, poly64x2_t] - - [float64x2_t, poly64x2_t] - [float64x2_t, p128] - [int8x8_t, float64x1_t] - [int16x4_t, float64x1_t] - [int32x2_t, float64x1_t] - - [int64x1_t, float64x1_t] - [int8x16_t, float64x2_t] - [int16x8_t, float64x2_t] - [int32x4_t, float64x2_t] - - [int64x2_t, float64x2_t] - [poly8x8_t, float64x1_t] - [uint16x4_t, float64x1_t] - [uint32x2_t, float64x1_t] - - [uint64x1_t, float64x1_t] - [poly8x16_t, float64x2_t] - [uint16x8_t, float64x2_t] - [uint32x4_t, float64x2_t] - - [uint64x2_t, float64x2_t] - [uint8x8_t, float64x1_t] - [poly16x4_t, float64x1_t] - - [poly64x1_t, float64x1_t] - [poly64x1_t, float32x2_t] - [uint8x16_t, float64x2_t] - [poly16x8_t, float64x2_t] - - [poly64x2_t, float64x2_t] - [poly64x2_t, float32x4_t] - [p128, float64x2_t] - [float32x2_t, float64x1_t] @@ -8802,7 +8549,7 @@ intrinsics: return_type: "{type}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [srshl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - "i64" @@ -8819,7 +8566,7 @@ intrinsics: return_type: "{type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [urshl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["u64", "i64"] @@ -8837,7 +8584,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [srshr, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -8853,7 +8600,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [urshr, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -8867,197 +8614,262 @@ intrinsics: arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] return_type: "{neon_type[2]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rshrn2, 'N = 2']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [rshrn2, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: - - [int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - [int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]'] - - [int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]'] - - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]'] - - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]'] + - [int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8'] + - [int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16'] + - [int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32'] + - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8'] + - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16'] + - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32'] compose: - FnCall: [static_assert!, ["{type[3]}"]] - FnCall: - - simd_shuffle! + - 'vcombine_{neon_type[0]}' - - a - FnCall: ["vrshrn_n_{neon_type[1]}::", [b]] - - "{type[4]}" - name: "vrsubhn_high_{neon_type[1]}" doc: "Rounding subtract returning high narrow" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] return_type: "{neon_type[3]}" attr: - - *little-endian + - *cfg-little-endian - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rsubhn2]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - - [int8x8_t, int16x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - [int16x4_t, int32x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]'] - - [int32x2_t, int64x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]'] - - [uint8x8_t, uint16x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - [uint16x4_t, uint32x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]'] - - [uint32x2_t, uint64x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]'] + - [int8x8_t, int16x8_t, int16x8_t, int8x16_t] + - [int16x4_t, int32x4_t, int32x4_t, int16x8_t] + - [int32x2_t, int64x2_t, int64x2_t, int32x4_t] + - [uint8x8_t, uint16x8_t, uint16x8_t, uint8x16_t] + - [uint16x4_t, uint32x4_t, uint32x4_t, uint16x8_t] + - [uint32x2_t, uint64x2_t, uint64x2_t, uint32x4_t] compose: - - Let: - - x - - "{neon_type[0]}" - - FnCall: ["vrsubhn_{neon_type[1]}", [b, c]] - - FnCall: [simd_shuffle!, [a, x, "{type[4]}"]] + - FnCall: ['vcombine_{neon_type[0]}', [a, {FnCall: ["vrsubhn_{neon_type[1]}", [b, c]]}]] - name: "vrsubhn_high_{neon_type[1]}" doc: "Rounding subtract returning high narrow" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] return_type: "{neon_type[3]}" attr: - - *big-endian + - *cfg-big-endian - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rsubhn]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - - [int8x8_t, int16x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - [int16x4_t, int32x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]'] - - [int32x2_t, int64x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]'] - - [uint8x8_t, uint16x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - [uint16x4_t, uint32x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]'] - - [uint32x2_t, uint64x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]'] + - [int8x8_t, int16x8_t, int16x8_t, int8x16_t] + - [int16x4_t, int32x4_t, int32x4_t, int16x8_t] + - [int32x2_t, int64x2_t, int64x2_t, int32x4_t] + - [uint8x8_t, uint16x8_t, uint16x8_t, uint8x16_t] + - [uint16x4_t, uint32x4_t, uint32x4_t, uint16x8_t] + - [uint32x2_t, uint64x2_t, uint64x2_t, uint32x4_t] compose: - - Let: - - x - - "{neon_type[0]}" - - FnCall: ["vrsubhn_{neon_type[1]}", [b, c]] - - FnCall: [simd_shuffle!, [a, x, "{type[4]}"]] + - FnCall: ['vcombine_{neon_type[0]}', [a, {FnCall: ["vrsubhn_{neon_type[1]}", [b, c]]}]] - name: "vcopy{neon_type[0].lane_nox}" doc: "Insert vector element from another vector element" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] return_type: "{neon_type[2]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['1', '3']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE1: i32, const LANE2: i32'] safety: safe types: - - [int8x8_t, int8x8_t, int8x8_t, '3', '3', ' match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [int16x4_t, int16x4_t, int16x4_t, '2', '2', ' match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [int32x2_t, int32x2_t, int32x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [uint8x8_t, uint8x8_t, uint8x8_t, '3', '3', ' match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [uint16x4_t, uint16x4_t, uint16x4_t, '2', '2', ' match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [uint32x2_t, uint32x2_t, uint32x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [poly8x8_t, poly8x8_t, poly8x8_t, '3', '3', ' match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [poly16x4_t, poly16x4_t, poly16x4_t, '2', '2', ' match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [float32x2_t, float32x2_t, float32x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }'] + - [int8x8_t, int8x8_t, int8x8_t, '3', '3'] + - [int16x4_t, int16x4_t, int16x4_t, '2', '2'] + - [int32x2_t, int32x2_t, int32x2_t, '1', '1'] + - [uint8x8_t, uint8x8_t, uint8x8_t, '3', '3'] + - [uint16x4_t, uint16x4_t, uint16x4_t, '2', '2'] + - [uint32x2_t, uint32x2_t, uint32x2_t, '1', '1'] + - [poly8x8_t, poly8x8_t, poly8x8_t, '3', '3'] + - [poly16x4_t, poly16x4_t, poly16x4_t, '2', '2'] + - [float32x2_t, float32x2_t, float32x2_t, '1', '1'] compose: - FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']] - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']] - - Identifier: ["{type[5]}", UnsafeSymbol] + - FnCall: ['vset{neon_type[0].lane_nox}', [{FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE2]]}, a], [LANE1]] - name: "vcopy{neon_type[0].lane_nox}" doc: "Insert vector element from another vector element" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] return_type: "{neon_type[2]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['1', '3']] + - *neon-stable + static_defs: ['const LANE1: i32, const LANE2: i32'] + safety: safe + types: + - [int8x16_t, int8x8_t, int8x16_t, '4', '3'] + - [int16x8_t, int16x4_t, int16x8_t, '3', '2'] + - [int32x4_t, int32x2_t, int32x4_t, '2', '1'] + - [uint8x16_t, uint8x8_t, uint8x16_t, '4', '3'] + - [uint16x8_t, uint16x4_t, uint16x8_t, '3', '2'] + - [uint32x4_t, uint32x2_t, uint32x4_t, '2', '1'] + - [poly8x16_t, poly8x8_t, poly8x16_t, '4', '3'] + - [poly16x8_t, poly16x4_t, poly16x8_t, '3', '2'] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']] + - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']] + - Let: [b, '{neon_type[2]}', {FnCall: ['vcombine{neon_type[1].no}', [b, b]]}] + - FnCall: ['vset{neon_type[0].lane_nox}', [{FnCall: ['vget{neon_type[2].lane_nox}', [b], [LANE2]]}, a], [LANE1]] + + - name: "vcopy_lane_{neon_type[0]}" + doc: "Insert vector element from another vector element" + arguments: ["_a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE1 = {type[1]}', 'LANE2 = {type[1]}']]}]] + - FnCall: [rustc_legacy_const_generics, ['1', '3']] + - *neon-stable + static_defs: ['const LANE1: i32, const LANE2: i32'] + safety: safe + types: + - [float64x1_t, '0', 'b'] + - [poly64x1_t, '0', 'b'] + - [uint64x1_t, '0', 'b'] + - [int64x1_t, '0', 'b'] + compose: + - FnCall: [static_assert!, ['LANE1 == {type[1]}']] + - FnCall: [static_assert!, ['LANE2 == {type[1]}']] + - Identifier: ["{type[2]}", Symbol] + + - name: "vcopy_laneq_{neon_type[0]}" + doc: "Insert vector element from another vector element" + arguments: ["_a: {neon_type[1]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE1 = {type[2]}', 'LANE2 = {type[3]}']]}]] + - FnCall: [rustc_legacy_const_generics, ['1', '3']] + - *neon-stable + static_defs: ['const LANE1: i32, const LANE2: i32'] + safety: safe + types: + - [float64x2_t, float64x1_t, '0', '1'] + - [poly64x2_t, poly64x1_t, '0', '1'] + - [uint64x2_t, uint64x1_t, '0', '1'] + - [int64x2_t, int64x1_t, '0', '1'] + compose: + - FnCall: [static_assert!, ['LANE1 == {type[2]}']] + - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[3]}']] + - FnCall: [transmute, [{FnCall: ['vget{neon_type[0].lane_nox}', [b], [LANE2]]}]] + + - name: "vcopy{neon_type[0].laneq_nox}" + doc: "Insert vector element from another vector element" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['1', '3']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE1: i32, const LANE2: i32'] safety: safe types: - - [int8x16_t, int8x8_t, int8x16_t, '4', '3', ' let b: int8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);', 'match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [int16x8_t, int16x4_t, int16x8_t, '3', '2', ' let b: int16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]);', 'match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [int32x4_t, int32x2_t, int32x4_t, '2', '1', ' let b: int32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]);', 'match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [uint8x16_t, uint8x8_t, uint8x16_t, '4', '3', ' let b: uint8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);', 'match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [uint16x8_t, uint16x4_t, uint16x8_t, '3', '2', ' let b: uint16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]);', 'match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [uint32x4_t, uint32x2_t, uint32x4_t, '2', '1', ' let b: uint32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]);', 'match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [poly8x16_t, poly8x8_t, poly8x16_t, '4', '3', ' let b: poly8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);', 'match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [poly16x8_t, poly16x4_t, poly16x8_t, '3', '2', ' let b: poly16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]);', 'match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }'] + - [int8x16_t, int8x16_t, int8x16_t, '4', '4'] + - [int16x8_t, int16x8_t, int16x8_t, '3', '3'] + - [int32x4_t, int32x4_t, int32x4_t, '2', '2'] + - [int64x2_t, int64x2_t, int64x2_t, '1', '1'] + - [uint8x16_t, uint8x16_t, uint8x16_t, '4', '4'] + - [uint16x8_t, uint16x8_t, uint16x8_t, '3', '3'] + - [uint32x4_t, uint32x4_t, uint32x4_t, '2', '2'] + - [uint64x2_t, uint64x2_t, uint64x2_t, '1', '1'] + - [poly8x16_t, poly8x16_t, poly8x16_t, '4', '4'] + - [poly16x8_t, poly16x8_t, poly16x8_t, '3', '3'] + - [float32x4_t, float32x4_t, float32x4_t, '2', '2'] + - [float64x2_t, float64x2_t, float64x2_t, '1', '1'] compose: - FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']] - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']] - - Identifier: ["{type[5]}", UnsafeSymbol] - - Identifier: ["{type[6]}", UnsafeSymbol] + - FnCall: ['vset{neon_type[0].lane_nox}', [{FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE2]]}, a], [LANE1]] - name: "vcopy{neon_type[0].laneq_nox}" doc: "Insert vector element from another vector element" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] return_type: "{neon_type[2]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['1', '3']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE1: i32, const LANE2: i32'] safety: safe + big_endian_inverse: true types: - - [int8x16_t, int8x16_t, int8x16_t, '4', '4', ' match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [int16x8_t, int16x8_t, int16x8_t, '3', '3', ' match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [int32x4_t, int32x4_t, int32x4_t, '2', '2', ' match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [int64x2_t, int64x2_t, int64x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [uint8x16_t, uint8x16_t, uint8x16_t, '4', '4', ' match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [uint16x8_t, uint16x8_t, uint16x8_t, '3', '3', ' match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [uint32x4_t, uint32x4_t, uint32x4_t, '2', '2', ' match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [uint64x2_t, uint64x2_t, uint64x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [poly8x16_t, poly8x16_t, poly8x16_t, '4', '4', ' match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [poly16x8_t, poly16x8_t, poly16x8_t, '3', '3', ' match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [poly64x2_t, poly64x2_t, poly64x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [float32x4_t, float32x4_t, float32x4_t, '2', '2', ' match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [float64x2_t, float64x2_t, float64x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }'] + - [poly64x2_t, poly64x2_t, poly64x2_t, '1', '1'] compose: - FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']] - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']] - - Identifier: ["{type[5]}", UnsafeSymbol] + - FnCall: [simd_insert!, [a, LANE1 as u32, {FnCall: [simd_extract!, [b, LANE2 as u32, p64]]}]] - name: "vcopy{neon_type[0].laneq_nox}" doc: "Insert vector element from another vector element" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] return_type: "{neon_type[2]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['1', '3']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE1: i32, const LANE2: i32'] safety: safe types: - - [int8x8_t, int8x16_t, int8x8_t, '3', '4', ' let a: int8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);', 'match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [int16x4_t, int16x8_t, int16x4_t, '2', '3', ' let a: int16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);', 'match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [int32x2_t, int32x4_t, int32x2_t, '1', '2', ' let a: int32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);', 'match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [uint8x8_t, uint8x16_t, uint8x8_t, '3', '4', ' let a: uint8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);', 'match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [uint16x4_t, uint16x8_t, uint16x4_t, '2', '3', ' let a: uint16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);', 'match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [uint32x2_t, uint32x4_t, uint32x2_t, '1', '2', 'let a: uint32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);', 'match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [poly8x8_t, poly8x16_t, poly8x8_t, '3', '4', ' let a: poly8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);', 'match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [poly16x4_t, poly16x8_t, poly16x4_t, '2', '3', ' let a: poly16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);', 'match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [float32x2_t, float32x4_t, float32x2_t, '1', '2', ' let a: float32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);', 'match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }'] + - [int8x8_t, int8x16_t, int8x8_t, '3', '4'] + - [int16x4_t, int16x8_t, int16x4_t, '2', '3'] + - [int32x2_t, int32x4_t, int32x2_t, '1', '2'] + - [uint8x8_t, uint8x16_t, uint8x8_t, '3', '4'] + - [uint16x4_t, uint16x8_t, uint16x4_t, '2', '3'] + - [uint32x2_t, uint32x4_t, uint32x2_t, '1', '2'] + - [poly8x8_t, poly8x16_t, poly8x8_t, '3', '4'] + - [poly16x4_t, poly16x8_t, poly16x4_t, '2', '3'] + - [float32x2_t, float32x4_t, float32x2_t, '1', '2'] compose: - FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']] - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']] - - Identifier: ["{type[5]}", UnsafeSymbol] - - Identifier: ["{type[6]}", UnsafeSymbol] + - FnCall: ['vset{neon_type[0].lane_nox}', [{FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE2]]}, a], [LANE1]] - name: "vcopyq_lane_{neon_type[0]}" doc: "Insert vector element from another vector element" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] return_type: "{neon_type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 1', 'LANE2 = 0']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [mov, 'LANE1 = 1', 'LANE2 = 0']]}]] + - FnCall: [rustc_legacy_const_generics, ['1', '3']] + - *neon-stable + static_defs: ['const LANE1: i32, const LANE2: i32'] + safety: safe + types: + - [int64x2_t, int64x1_t] + - [uint64x2_t, uint64x1_t] + - [float64x2_t, float64x1_t] + compose: + - FnCall: [static_assert_uimm_bits!, [LANE1, '1']] + - FnCall: [static_assert!, ['LANE2 == 0']] + - Let: [b, '{neon_type[0]}', {FnCall: ['vcombine{neon_type[1].no}', [b, b]]}] + - FnCall: ['vset{neon_type[0].lane_nox}', [{FnCall: ['vget{neon_type[0].lane_nox}', [b], [LANE2]]}, a], [LANE1]] + + - name: "vcopyq_lane_{neon_type[0]}" + doc: "Insert vector element from another vector element" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [mov, 'LANE1 = 1', 'LANE2 = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['1', '3']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE1: i32, const LANE2: i32'] safety: safe + big_endian_inverse: true types: - - [int64x2_t, int64x1_t, ' let b: int64x2_t = simd_shuffle!(b, b, [0, 1]);', 'match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [uint64x2_t, uint64x1_t, ' let b: uint64x2_t = simd_shuffle!(b, b, [0, 1]);', 'match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [poly64x2_t, poly64x1_t, ' let b: poly64x2_t = simd_shuffle!(b, b, [0, 1]);', 'match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }'] - - [float64x2_t, float64x1_t, ' let b: float64x2_t = simd_shuffle!(b, b, [0, 1]);', 'match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }'] + - [poly64x2_t, poly64x1_t] compose: - FnCall: [static_assert_uimm_bits!, [LANE1, '1']] - FnCall: [static_assert!, ['LANE2 == 0']] - - Identifier: ['{type[2]}', UnsafeSymbol] - - Identifier: ['{type[3]}', UnsafeSymbol] + - Let: [b, '{neon_type[0]}', {FnCall: ['vcombine{neon_type[1].no}', [b, b]]}] + - FnCall: [simd_insert!, [a, LANE1 as u32, {FnCall: [simd_extract!, [b, LANE2 as u32, p64]]}]] - name: "vcopyq_lane_f32" doc: "Insert vector element from another vector element" @@ -9066,16 +8878,15 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 1', 'LANE2 = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['1', '3']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE1: i32, const LANE2: i32'] safety: safe types: - - [float32x4_t, float32x2_t, ' let b: float32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]);', 'match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }'] + - [float32x4_t, float32x2_t] compose: - FnCall: [static_assert_uimm_bits!, [LANE1, 2]] - FnCall: [static_assert_uimm_bits!, [LANE2, 1]] - - Identifier: ["{type[2]}", UnsafeSymbol] - - Identifier: ["{type[3]}", UnsafeSymbol] + - FnCall: ['vset{neon_type[0].lane_nox}', [{FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE2]]}, a], [LANE1]] - name: "vcreate_f64" doc: "Insert vector element from another vector element" @@ -9083,7 +8894,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["u64", float64x1_t] @@ -9097,9 +8908,10 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE: i32'] safety: safe + big_endian_inverse: true types: - ["f64", float64x1_t, float64x1_t] compose: @@ -9113,9 +8925,10 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE: i32'] safety: safe + big_endian_inverse: true types: - ["f64", float64x2_t, float64x2_t] compose: @@ -9128,7 +8941,7 @@ intrinsics: return_type: "{type}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sshl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - "i64" @@ -9146,7 +8959,7 @@ intrinsics: return_type: "{type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ushl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["u64", "i64"] @@ -9163,63 +8976,62 @@ intrinsics: arguments: ["a: {neon_type[0]}"] return_type: "{neon_type[1]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sshll2, 'N = 2']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sshll2, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: - - [int8x16_t, int16x8_t, int8x8_t, 'N >= 0 && N <= 8', '[8, 9, 10, 11, 12, 13, 14, 15]'] - - [int16x8_t, int32x4_t, int16x4_t, 'N >= 0 && N <= 16', '[4, 5, 6, 7]'] - - [int32x4_t, int64x2_t, int32x2_t, 'N >= 0 && N <= 32', '[2, 3]'] + - [int8x16_t, int16x8_t, int8x8_t, 'N >= 0 && N <= 8'] + - [int16x8_t, int32x4_t, int16x4_t, 'N >= 0 && N <= 16'] + - [int32x4_t, int64x2_t, int32x2_t, 'N >= 0 && N <= 32'] compose: - FnCall: [static_assert!, ["{type[3]}"]] - - Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}] - - FnCall: ["vshll_n_{neon_type[2]}::", [b]] + - Let: [b, {FnCall: ['vget_high_{neon_type[0]}', [a]]}] + - FnCall: ["vshll_n_{neon_type[2]}", [b], [N]] - name: "vshll_high_n_{neon_type[0]}" doc: "Signed shift left long" arguments: ["a: {neon_type[0]}"] return_type: "{neon_type[1]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ushll2, 'N = 2']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [ushll2, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: - - [uint8x16_t, uint16x8_t, uint8x8_t, 'N >= 0 && N <= 8', '[8, 9, 10, 11, 12, 13, 14, 15]'] - - [uint16x8_t, uint32x4_t, uint16x4_t, 'N >= 0 && N <= 16', '[4, 5, 6, 7]'] - - [uint32x4_t, uint64x2_t, uint32x2_t, 'N >= 0 && N <= 32', '[2, 3]'] + - [uint8x16_t, uint16x8_t, uint8x8_t, 'N >= 0 && N <= 8'] + - [uint16x8_t, uint32x4_t, uint16x4_t, 'N >= 0 && N <= 16'] + - [uint32x4_t, uint64x2_t, uint32x2_t, 'N >= 0 && N <= 32'] compose: - FnCall: [static_assert!, ["{type[3]}"]] - - Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}] - - FnCall: ["vshll_n_{neon_type[2]}::", [b]] + - Let: [b, "{neon_type[2]}", {FnCall: ['vget_high_{neon_type[0]}', [a]]}] + - FnCall: ["vshll_n_{neon_type[2]}", [b], [N]] - name: "vshrn_high_n_{neon_type[1]}" doc: "Shift right narrow" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] return_type: "{neon_type[2]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [shrn2, 'N = 2']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [shrn2, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: - - [int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - [int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]'] - - [int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]'] - - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]'] - - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]'] + - [int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8'] + - [int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16'] + - [int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32'] + - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8'] + - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16'] + - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32'] compose: - FnCall: [static_assert!, ["{type[3]}"]] - FnCall: - - simd_shuffle! + - 'vcombine_{neon_type[0]}' - - a - - FnCall: ["vshrn_n_{neon_type[1]}::", [b]] - - "{type[4]}" + - FnCall: ["vshrn_n_{neon_type[1]}", [b], [N]] - name: "vsm3partw1{neon_type.no}" doc: "SM3PARTW1" @@ -9230,6 +9042,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm3partw1]]}]] - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']] safety: safe + big_endian_inverse: true types: - uint32x4_t compose: @@ -9248,6 +9061,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm3partw2]]}]] - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']] safety: safe + big_endian_inverse: true types: - uint32x4_t compose: @@ -9266,6 +9080,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm3ss1]]}]] - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']] safety: safe + big_endian_inverse: true types: - uint32x4_t compose: @@ -9284,6 +9099,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm4ekey]]}]] - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']] safety: safe + big_endian_inverse: true types: - uint32x4_t compose: @@ -9302,6 +9118,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm4e]]}]] - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']] safety: safe + big_endian_inverse: true types: - uint32x4_t compose: @@ -9338,6 +9155,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sha512h]]}]] - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']] safety: safe + big_endian_inverse: true types: - uint64x2_t compose: @@ -9356,6 +9174,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sha512h2]]}]] - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']] safety: safe + big_endian_inverse: true types: - uint64x2_t compose: @@ -9374,6 +9193,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sha512su0]]}]] - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']] safety: safe + big_endian_inverse: true types: - uint64x2_t compose: @@ -9392,6 +9212,7 @@ intrinsics: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sha512su1]]}]] - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']] safety: safe + big_endian_inverse: true types: - uint64x2_t compose: @@ -9412,6 +9233,7 @@ intrinsics: - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']] static_defs: ["const IMM2: i32"] safety: safe + big_endian_inverse: true types: - ['1aq_u32', uint32x4_t, 'sm3tt1a', 'SM3TT1A'] - ['1bq_u32', uint32x4_t, 'sm3tt1b', 'SM3TT1B'] @@ -9514,7 +9336,7 @@ intrinsics: - transmute - - FnCall: - _vrnd32x_f64 - - - FnCall: [simd_extract!, [a, 0]] + - - FnCall: ['vget{neon_type.lane_nox}', [a], [0]] - name: "vrnd32z{neon_type.no}" doc: "Floating-point round to 32-bit integer toward zero" @@ -9558,7 +9380,7 @@ intrinsics: arch: aarch64,arm64ec - FnCall: - transmute - - - FnCall: [_vrnd32z_f64, [{FnCall: [simd_extract!, [a, 0]]}]] + - - FnCall: [_vrnd32z_f64, [{FnCall: ['vget{neon_type.lane_nox}', [a], [0]]}]] - name: "vrnd64x{neon_type.no}" doc: "Floating-point round to 64-bit integer, using current rounding mode" @@ -9602,7 +9424,7 @@ intrinsics: arch: aarch64,arm64ec - FnCall: - transmute - - - FnCall: [_vrnd64x_f64, [{FnCall: [simd_extract!, [a, 0]]}]] + - - FnCall: [_vrnd64x_f64, [{FnCall: ['vget{neon_type.lane_nox}', [a], [0]]}]] - name: "vrnd64z{neon_type.no}" doc: "Floating-point round to 64-bit integer toward zero" @@ -9646,7 +9468,7 @@ intrinsics: arch: aarch64,arm64ec - FnCall: - transmute - - - FnCall: [_vrnd64z_f64, [{FnCall: [simd_extract!, [a, 0]]}]] + - - FnCall: [_vrnd64z_f64, [{FnCall: ['vget{neon_type.lane_nox}', [a], [0]]}]] - name: "vtrn1{neon_type[0].no}" doc: Transpose vectors @@ -9654,8 +9476,9 @@ intrinsics: return_type: "{neon_type[0]}" attr: - *neon-stable - - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [trn1]]}]] + - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [trn1]]}]] safety: safe + big_endian_inverse: true types: - [int8x8_t, '[0, 8, 2, 10, 4, 12, 6, 14]'] - [int8x16_t, '[0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]'] @@ -9684,8 +9507,9 @@ intrinsics: - *neon-fp16 - *neon-stable-fp16 - *target-not-arm64ec - - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [trn1]]}]] + - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [trn1]]}]] safety: safe + big_endian_inverse: true types: - [float16x4_t, '[0, 4, 2, 6]'] - [float16x8_t, '[0, 8, 2, 10, 4, 12, 6, 14]'] @@ -9698,8 +9522,9 @@ intrinsics: return_type: "{neon_type[0]}" attr: - *neon-stable - - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip1]]}]] + - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip1]]}]] safety: safe + big_endian_inverse: true types: - [int32x2_t, '[0, 2]'] - [int64x2_t, '[0, 2]'] @@ -9717,8 +9542,9 @@ intrinsics: return_type: "{neon_type[0]}" attr: - *neon-stable - - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [trn2]]}]] + - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [trn2]]}]] safety: safe + big_endian_inverse: true types: - [int8x8_t, '[1, 9, 3, 11, 5, 13, 7, 15]'] - [int8x16_t, '[1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]'] @@ -9746,8 +9572,9 @@ intrinsics: - *neon-fp16 - *neon-stable-fp16 - *target-not-arm64ec - - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [trn2]]}]] + - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [trn2]]}]] safety: safe + big_endian_inverse: true types: - [float16x4_t, '[1, 5, 3, 7]'] - [float16x8_t, '[1, 9, 3, 11, 5, 13, 7, 15]'] @@ -9760,8 +9587,9 @@ intrinsics: return_type: "{neon_type[0]}" attr: - *neon-stable - - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip2]]}]] + - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip2]]}]] safety: safe + big_endian_inverse: true types: - [int32x2_t, '[1, 3]'] - [int64x2_t, '[1, 3]'] @@ -9779,8 +9607,9 @@ intrinsics: return_type: "{neon_type[0]}" attr: - *neon-stable - - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip2]]}]] + - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip2]]}]] safety: safe + big_endian_inverse: true types: - [int8x8_t, '[4, 12, 5, 13, 6, 14, 7, 15]'] - [int8x16_t, '[8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]'] @@ -9815,8 +9644,9 @@ intrinsics: - *neon-fp16 - *neon-stable-fp16 - *target-not-arm64ec - - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip2]]}]] + - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip2]]}]] safety: safe + big_endian_inverse: true types: - [float16x4_t, '[2, 6, 3, 7]'] - [float16x8_t, '[4, 12, 5, 13, 6, 14, 7, 15]'] @@ -9829,8 +9659,9 @@ intrinsics: return_type: "{neon_type[0]}" attr: - *neon-stable - - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip1]]}]] + - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip1]]}]] safety: safe + big_endian_inverse: true types: - [int8x8_t, '[0, 8, 1, 9, 2, 10, 3, 11]'] - [int8x16_t, '[0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]'] @@ -9866,8 +9697,9 @@ intrinsics: - *neon-fp16 - *neon-stable-fp16 - *target-not-arm64ec - - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip1]]}]] + - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip1]]}]] safety: safe + big_endian_inverse: true types: - [float16x4_t, '[0, 4, 1, 5]'] - [float16x8_t, '[0, 8, 1, 9, 2, 10, 3, 11]'] @@ -9880,8 +9712,9 @@ intrinsics: return_type: "{neon_type[0]}" attr: - *neon-stable - - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip1]]}]] + - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip1]]}]] safety: safe + big_endian_inverse: true types: - [int32x2_t, '[0, 2]'] - [int64x2_t, '[0, 2]'] @@ -9899,8 +9732,9 @@ intrinsics: return_type: "{neon_type[0]}" attr: - *neon-stable - - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uzp1]]}]] + - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [uzp1]]}]] safety: safe + big_endian_inverse: true types: - [int8x8_t, '[0, 2, 4, 6, 8, 10, 12, 14]'] - [int8x16_t, '[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]'] @@ -9928,8 +9762,9 @@ intrinsics: - *neon-fp16 - *neon-stable-fp16 - *target-not-arm64ec - - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uzp1]]}]] + - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [uzp1]]}]] safety: safe + big_endian_inverse: true types: - [float16x4_t, '[0, 2, 4, 6]'] - [float16x8_t, '[0, 2, 4, 6, 8, 10, 12, 14]'] @@ -9942,8 +9777,9 @@ intrinsics: return_type: "{neon_type[0]}" attr: - *neon-stable - - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip2]]}]] + - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip2]]}]] safety: safe + big_endian_inverse: true types: - [int32x2_t, '[1, 3]'] - [int64x2_t, '[1, 3]'] @@ -9961,8 +9797,9 @@ intrinsics: return_type: "{neon_type[0]}" attr: - *neon-stable - - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uzp2]]}]] + - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [uzp2]]}]] safety: safe + big_endian_inverse: true types: - [int8x8_t, '[1, 3, 5, 7, 9, 11, 13, 15]'] - [int8x16_t, '[1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]'] @@ -9994,8 +9831,9 @@ intrinsics: - *neon-fp16 - *neon-stable-fp16 - *target-not-arm64ec - - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uzp2]]}]] + - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [uzp2]]}]] safety: safe + big_endian_inverse: true types: - [float16x4_t, '[1, 3, 5, 7]'] - [float16x8_t, '[1, 3, 5, 7, 9, 11, 13, 15]'] @@ -10011,23 +9849,21 @@ intrinsics: arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] return_type: "{neon_type[0]}" attr: - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] - - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uabal2]]}]] + - *neon-stable + - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [uabal2]]}]] safety: safe types: - - [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', '[8, 9, 10, 11, 12, 13, 14, 15]'] - - [uint32x4_t, uint16x8_t, uint16x4_t, '[4, 5, 6, 7]', '[4, 5, 6, 7]'] - - [uint64x2_t, uint32x4_t, uint32x2_t, '[2, 3]', '[2, 3]'] + - [uint16x8_t, uint8x16_t, uint8x8_t] + - [uint32x4_t, uint16x8_t, uint16x4_t] + - [uint64x2_t, uint32x4_t, uint32x2_t] compose: - Let: - d - - "{neon_type[2]}" - - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]] + - FnCall: ['vget_high_{neon_type[1]}', [b]] - Let: - e - - "{neon_type[2]}" - - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]] - - Let: [f, "{neon_type[2]}", {FnCall: ["vabd_{neon_type[2]}", [d, e]]}] + - FnCall: ['vget_high_{neon_type[1]}', [c]] + - Let: [f, {FnCall: ["vabd_{neon_type[2]}", [d, e]]}] - FnCall: - simd_add - - a @@ -10039,39 +9875,28 @@ intrinsics: return_type: "{neon_type[0]}" attr: - *neon-stable - - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sabal2]]}]] + - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [sabal2]]}]] safety: safe types: - - [int16x8_t, int8x16_t, int8x16_t, '[8, 9, 10, 11, 12, 13, 14, 15]', int8x8_t, uint8x8_t] - - [int32x4_t, int16x8_t, int16x8_t, '[4, 5, 6, 7]', int16x4_t, uint16x4_t] - - [int64x2_t, int32x4_t, int32x4_t, '[2, 3]', int32x2_t, uint32x2_t] + - [int16x8_t, int8x16_t, int8x16_t, int8x8_t, uint8x8_t] + - [int32x4_t, int16x8_t, int16x8_t, int16x4_t, uint16x4_t] + - [int64x2_t, int32x4_t, int32x4_t, int32x2_t, uint32x2_t] compose: - Let: - d - - "{neon_type[4]}" - - FnCall: - - simd_shuffle! - - - b - - b - - "{type[3]}" + - FnCall: ['vget_high_{neon_type[1]}', [b]] - Let: - e - - "{neon_type[4]}" - - FnCall: - - simd_shuffle! - - - c - - c - - "{type[3]}" + - FnCall: ['vget_high_{neon_type[2]}', [c]] - Let: - f - - "{neon_type[4]}" - FnCall: - - "vabd{neon_type[4].no}" + - "vabd{neon_type[3].no}" - - d - e - Let: - f - - "{neon_type[5]}" + - "{neon_type[4]}" - FnCall: - simd_cast - - f @@ -10164,8 +9989,9 @@ intrinsics: return_type: "{neon_type}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmaxnmp]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe + big_endian_inverse: true types: - float32x2_t - float64x2_t @@ -10182,7 +10008,7 @@ intrinsics: arguments: ["a: {type[0]}", "b: {neon_type[1]}"] attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: unsafe: [neon] types: @@ -10205,7 +10031,7 @@ intrinsics: arguments: ["a: {type[0]}", "b: {neon_type[1]}"] attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: unsafe: [neon] types: @@ -10229,7 +10055,7 @@ intrinsics: arguments: ["a: {type[0]}", "b: {neon_type[1]}"] attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: unsafe: [neon] types: @@ -10256,7 +10082,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmla, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE: i32'] safety: safe types: @@ -10271,7 +10097,7 @@ intrinsics: - "vfma{neon_type[0].no}" - - a - b - - FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]] + - FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]] - name: "vfma{type[3]}" @@ -10297,7 +10123,7 @@ intrinsics: - "vfma{neon_type[0].no}" - - a - b - - FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]] + - FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]] # vfms lane f16 @@ -10324,7 +10150,7 @@ intrinsics: - "vfms{neon_type[0].no}" - - a - b - - FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]] + - FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]] - name: "vfms{type[1]}" @@ -10350,7 +10176,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE: i32'] safety: safe types: @@ -10361,16 +10187,16 @@ intrinsics: - "vfma{neon_type.no}" - - a - b - - FnCall: ["vdup{neon_type.N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]] + - FnCall: ["vdup{neon_type.N}", [{FnCall: ['vget{neon_type.lane_nox}', [c], [LANE]]}]] - name: "vfma_laneq_f64" doc: "Floating-point fused multiply-add to accumulator" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] return_type: "{neon_type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE: i32'] safety: safe types: @@ -10381,7 +10207,7 @@ intrinsics: - "vfma{neon_type[0].no}" - - a - b - - FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]] + - FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]] - name: "vfmaq_lane_f64" doc: "Floating-point fused multiply-add to accumulator" @@ -10390,7 +10216,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmla, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE: i32'] safety: safe types: @@ -10401,16 +10227,16 @@ intrinsics: - "vfma{neon_type[0].no}" - - a - b - - FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]] + - FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]] - name: "vfma{type[2]}" doc: "Floating-point fused multiply-add to accumulator" arguments: ["a: {type[0]}", "b: {type[0]}", "c: {neon_type[1]}"] return_type: "{type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE: i32'] safety: safe types: @@ -10419,7 +10245,7 @@ intrinsics: - ["f64", float64x2_t, "d_laneq_f64", '1'] compose: - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]] - - Let: [c, "{type[0]}", {FnCall: [simd_extract!, [c, 'LANE as u32']]}] + - Let: [c, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}] - FnCall: ["fma{type[0]}", [b, c, a]] - name: "vfmad_lane_f64" @@ -10429,14 +10255,14 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE: i32'] safety: safe types: - ["f64", float64x1_t] compose: - FnCall: [static_assert!, ['LANE == 0']] - - Let: [c, "{type[0]}", {FnCall: [simd_extract!, [c, 'LANE as u32']]}] + - Let: [c, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}] - FnCall: [fmaf64, [b, c, a]] @@ -10461,7 +10287,7 @@ intrinsics: arguments: ["a: {type[0]}", "b: {type[0]}", "v: {neon_type[1]}"] return_type: "{type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - *neon-fp16 - *neon-unstable-f16 @@ -10473,7 +10299,7 @@ intrinsics: - ["f16", float16x8_t, 'q_f16', '3'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] - - Let: [c, "{type[0]}", {FnCall: [simd_extract!, [v, 'LANE as u32']]}] + - Let: [c, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [v], [LANE]]}] - FnCall: ["vfmah_{type[0]}", [a, b, c]] - name: "vfmsh_lane{type[2]}" @@ -10481,7 +10307,7 @@ intrinsics: arguments: ["a: {type[0]}", "b: {type[0]}", "v: {neon_type[1]}"] return_type: "{type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - *neon-fp16 - *neon-unstable-f16 @@ -10493,7 +10319,7 @@ intrinsics: - ["f16", float16x8_t, 'q_f16', '3'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] - - Let: [c, "{type[0]}", {FnCall: [simd_extract!, [v, 'LANE as u32']]}] + - Let: [c, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [v], [LANE]]}] - FnCall: ["vfmsh_{type[0]}", [a, b, c]] - name: "vfms_f64" @@ -10502,7 +10328,7 @@ intrinsics: return_type: "{neon_type}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - float64x1_t @@ -10516,7 +10342,7 @@ intrinsics: return_type: "{neon_type}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmls]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - float64x2_t @@ -10530,7 +10356,7 @@ intrinsics: return_type: "{neon_type}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - float64x1_t @@ -10545,7 +10371,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmls, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE: i32'] safety: safe types: @@ -10556,7 +10382,7 @@ intrinsics: - [float64x2_t, float64x2_t, '1', q_laneq_f64] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[2]}']] - - FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]}]] + - FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]}]] - name: "vfms_lane_f64" doc: "Floating-point fused multiply-subtract to accumulator" @@ -10565,30 +10391,30 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE: i32'] safety: safe types: - float64x1_t compose: - FnCall: [static_assert!, ['LANE == 0']] - - FnCall: ["vfms{neon_type.no}", [a, b, {FnCall: ["vdup{neon_type.N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]}]] + - FnCall: ["vfms{neon_type.no}", [a, b, {FnCall: ["vdup{neon_type.N}", [{FnCall: ['vget{neon_type.lane_nox}', [c], [LANE]]}]]}]] - name: "vfms_laneq_f64" doc: "Floating-point fused multiply-subtract to accumulator" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] return_type: "{neon_type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE: i32'] safety: safe types: - [float64x1_t, float64x2_t] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '1']] - - FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]}]] + - FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]}]] - name: "vfmsq_lane_f64" doc: "Floating-point fused multiply-subtract to accumulator" @@ -10597,23 +10423,23 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmls, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE: i32'] safety: safe types: - [float64x2_t, float64x1_t] compose: - FnCall: [static_assert!, ['LANE == 0']] - - FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]}]] + - FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]}]] - name: "vfms{type[2]}" doc: "Floating-point fused multiply-subtract to accumulator" arguments: ["a: {type[0]}", "b: {type[0]}", "c: {neon_type[1]}"] return_type: "{type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE: i32'] safety: safe types: @@ -10631,7 +10457,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmeq]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float32x2_t, uint32x2_t, 'f32x2', 'f32x2::new(0.0, 0.0)'] @@ -10665,18 +10491,18 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["f32", "u32", "s_f32"] - ["f64", "u64", "d_f64"] compose: - FnCall: - - simd_extract! + - 'vget_lane_{type[1]}' - - FnCall: - "vceqz_{type[0]}" - - FnCall: ["vdup_n_{type[0]}", [a]] - - '0' + - - '0' - name: "vceqz{type[2]}" doc: "Floating-point compare bitwise equal to zero" @@ -10692,11 +10518,11 @@ intrinsics: - ["f16", "u16", "h_f16"] compose: - FnCall: - - simd_extract! + - 'vget_lane_{type[1]}' - - FnCall: - "vceqz_{type[0]}" - - FnCall: ["vdup_n_{type[0]}", [a]] - - '0' + - - '0' - name: "vceqzd_{type[2]}" doc: "Compare bitwise equal to zero" @@ -10704,7 +10530,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["i64", "u64", "s64"] @@ -10722,7 +10548,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmeq]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)'] @@ -10750,7 +10576,7 @@ intrinsics: return_type: "{neon_type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmeq]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [uint8x8_t, uint8x8_t, u8x8, 'u8x8::new(0, 0, 0, 0, 0, 0, 0, 0)'] @@ -10774,7 +10600,7 @@ intrinsics: return_type: "{neon_type}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmhs]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - uint64x1_t @@ -10788,19 +10614,19 @@ intrinsics: return_type: "{type[2]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["s_f32", "f32", "u32"] - ["d_f64", "f64", "u64"] compose: - FnCall: - - simd_extract! + - 'vget_lane_{type[2]}' - - FnCall: - "vcge_{type[1]}" - - FnCall: ["vdup_n_{type[1]}", [a]] - FnCall: ["vdup_n_{type[1]}", [b]] - - '0' + - - '0' - name: "vcge{type[0]}" @@ -10817,12 +10643,12 @@ intrinsics: - ["h_f16", "f16", "u16"] compose: - FnCall: - - simd_extract! + - 'vget_lane_{type[2]}' - - FnCall: - "vcge_{type[1]}" - - FnCall: ["vdup_n_{type[1]}", [a]] - FnCall: ["vdup_n_{type[1]}", [b]] - - '0' + - - '0' - name: "vcge{neon_type[0].no}" doc: "Floating-point compare greater than or equal" @@ -10830,7 +10656,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmge]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float64x1_t, uint64x1_t] @@ -10844,7 +10670,7 @@ intrinsics: return_type: "{type[2]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["d_s64", "i64", "u64", s64] @@ -10863,7 +10689,7 @@ intrinsics: return_type: "{neon_type}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmhi]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - uint64x1_t @@ -10877,7 +10703,7 @@ intrinsics: return_type: "{type[2]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["s64", "i64", "u64"] @@ -10896,7 +10722,7 @@ intrinsics: return_type: "{neon_type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmtst]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [uint64x1_t, u64x1, 'u64x1::new(0)'] @@ -10912,7 +10738,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmge]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float32x2_t, uint32x2_t, f32x2, 'f32x2::new(0.0, 0.0)'] @@ -10932,18 +10758,18 @@ intrinsics: return_type: "{type[2]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["s_f32", "f32", "u32"] - ["d_f64", "f64", "u64"] compose: - FnCall: - - simd_extract! + - 'vget_lane_{type[2]}' - - FnCall: - "vcgez_{type[1]}" - - FnCall: ["vdup_n_{type[1]}", [a]] - - '0' + - - '0' - name: "vcgez{type[0]}" @@ -10960,11 +10786,11 @@ intrinsics: - ["h_f16", "f16", "u16"] compose: - FnCall: - - simd_extract! + - 'vget_lane_{type[2]}' - - FnCall: - "vcgez_{type[1]}" - - FnCall: ["vdup_n_{type[1]}", [a]] - - '0' + - - '0' - name: "vclezd_s64" doc: "Compare less than or equal to zero" @@ -10972,7 +10798,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["i64", "u64"] @@ -10987,7 +10813,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["i64", "u64", 's64'] @@ -11006,7 +10832,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmgt]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)'] @@ -11030,7 +10856,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["i64", "u64"] @@ -11047,7 +10873,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmgt]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float32x2_t, uint32x2_t, f32x2, 'f32x2::new(0.0, 0.0)'] @@ -11064,18 +10890,18 @@ intrinsics: return_type: "{type[2]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["s_f32", "f32", "u32"] - ["d_f64", "f64", "u64"] compose: - FnCall: - - "simd_extract!" + - 'vget_lane_{type[2]}' - - FnCall: - "vcgtz_{type[1]}" - - FnCall: ["vdup_n_{type[1]}", [a]] - - '0' + - - '0' - name: "vcgtz{type[0]}" doc: "Floating-point compare greater than zero" @@ -11091,11 +10917,11 @@ intrinsics: - ["h_f16", "f16", "u16"] compose: - FnCall: - - "simd_extract!" + - 'vget_lane_{type[2]}' - - FnCall: - "vcgtz_{type[1]}" - - FnCall: ["vdup_n_{type[1]}", [a]] - - '0' + - - '0' - name: "vcvt{neon_type[1].no}_{neon_type[0]}" doc: "Floating-point convert to unsigned fixed-point, rounding toward zero" @@ -11103,7 +10929,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float64x1_t, uint64x1_t] @@ -11121,7 +10947,7 @@ intrinsics: return_type: "{neon_type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float64x1_t, "f64"] @@ -11139,7 +10965,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE: i32'] safety: safe types: @@ -11151,7 +10977,7 @@ intrinsics: - - a - FnCall: - "transmute::" - - - FnCall: [simd_extract!, [b, 'LANE as u32']] + - - FnCall: ['vget{neon_type.lane_nox}', [b], [LANE]] - name: "vmulq_lane_f64" doc: "Floating-point multiply" @@ -11160,7 +10986,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE: i32'] safety: safe types: @@ -11170,7 +10996,7 @@ intrinsics: - FnCall: - simd_mul - - a - - FnCall: ["simd_shuffle!", [b, b, '[LANE as u32; 2]']] + - FnCall: [vdupq_lane_f64, [b], [LANE]] - name: "vmuld_lane_f64" doc: "Floating-point multiply" @@ -11179,14 +11005,14 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE: i32'] safety: safe types: - ["f64", float64x1_t] compose: - FnCall: [static_assert!, ['LANE == 0']] - - Let: [b, '{type[0]}', {FnCall: [simd_extract!, [b, 'LANE as u32']]}] + - Let: [b, '{type[0]}', {FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]]}] - Identifier: ['a * b', Symbol] - name: "vmul_laneq_f64" @@ -11196,7 +11022,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE: i32'] safety: safe types: @@ -11208,7 +11034,7 @@ intrinsics: - - a - FnCall: - "transmute::" - - - FnCall: [simd_extract!, [b, 'LANE as u32']] + - - FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]] - name: "vmulq_laneq_f64" doc: "Floating-point multiply" @@ -11217,7 +11043,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE: i32'] safety: safe types: @@ -11227,7 +11053,7 @@ intrinsics: - FnCall: - simd_mul - - a - - FnCall: [simd_shuffle!, [b, b, '[LANE as u32; 2]']] + - FnCall: [vdupq_laneq_f64, [b], [LANE]] # vmulq_laneq_f16 @@ -11244,14 +11070,14 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [float16x4_t, float16x8_t, '_lane', "[LANE as u32; 4]"] - - [float16x8_t, float16x8_t, 'q_lane', "[LANE as u32; 8]"] + - [float16x4_t, float16x8_t, '_lane'] + - [float16x8_t, float16x8_t, 'q_lane'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '3']] - FnCall: - simd_mul - - a - - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]] + - FnCall: ['vdup{neon_type[0].laneq_nox}', [b], [LANE]] - name: "vmul{type[1]}_{type[0]}" @@ -11277,7 +11103,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE: i32'] safety: safe types: @@ -11286,7 +11112,7 @@ intrinsics: - ["f64", float64x2_t, "d_laneq_f64", '1'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] - - Let: [b, '{type[0]}', {FnCall: [simd_extract!, [b, 'LANE as u32']]}] + - Let: [b, '{type[0]}', {FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]]}] - Identifier: ['a * b', Symbol] @@ -11307,7 +11133,7 @@ intrinsics: - ["f16", float16x8_t, "h_laneq_f16", '3'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] - - Let: [b, '{type[0]}', {FnCall: [simd_extract!, [b, 'LANE as u32']]}] + - Let: [b, '{type[0]}', {FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]]}] - Identifier: ['a * b', Symbol] @@ -11318,7 +11144,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [srshr, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -11333,8 +11159,8 @@ intrinsics: arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"] return_type: "{neon_type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlsl2]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smlsl2]]}]] + - *neon-stable safety: safe types: - [int32x4_t, int16x8_t, "i16"] @@ -11347,8 +11173,8 @@ intrinsics: arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"] return_type: "{neon_type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlsl2]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umlsl2]]}]] + - *neon-stable safety: safe types: - [uint32x4_t, uint16x8_t, "u16"] @@ -11361,46 +11187,46 @@ intrinsics: arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] return_type: "{neon_type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlsl2, 'LANE = 1']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smlsl2, 'LANE = 1']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE: i32'] safety: safe types: - - [int32x4_t, int16x8_t, int16x4_t, '2', '[LANE as u32; 8]'] - - [int32x4_t, int16x8_t, int16x8_t, '3', '[LANE as u32; 8]'] - - [int64x2_t, int32x4_t, int32x2_t, '1', '[LANE as u32; 4]'] - - [int64x2_t, int32x4_t, int32x4_t, '2', '[LANE as u32; 4]'] + - [int32x4_t, int16x8_t, int16x4_t, '2'] + - [int32x4_t, int16x8_t, int16x8_t, '3'] + - [int64x2_t, int32x4_t, int32x2_t, '1'] + - [int64x2_t, int32x4_t, int32x4_t, '2'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] - FnCall: - "vmlsl_high_{neon_type[1]}" - - a - b - - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]] + - FnCall: ['vdupq_lane{neon_type[2].no}', [c], [LANE]] - name: "vmlsl_high_lane{neon_type[2].no}" doc: "Multiply-subtract long" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] return_type: "{neon_type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlsl2, 'LANE = 1']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umlsl2, 'LANE = 1']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE: i32'] safety: safe types: - - [uint32x4_t, uint16x8_t, uint16x4_t, '2', '[LANE as u32; 8]'] - - [uint32x4_t, uint16x8_t, uint16x8_t, '3', '[LANE as u32; 8]'] - - [uint64x2_t, uint32x4_t, uint32x2_t, '1', '[LANE as u32; 4]'] - - [uint64x2_t, uint32x4_t, uint32x4_t, '2', '[LANE as u32; 4]'] + - [uint32x4_t, uint16x8_t, uint16x4_t, '2'] + - [uint32x4_t, uint16x8_t, uint16x8_t, '3'] + - [uint64x2_t, uint32x4_t, uint32x2_t, '1'] + - [uint64x2_t, uint32x4_t, uint32x4_t, '2'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] - FnCall: - "vmlsl_high_{neon_type[1]}" - - a - b - - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]] + - FnCall: ['vdupq_lane{neon_type[2].no}', [c], [LANE]] - name: "vclt{neon_type[0].no}" doc: "Floating-point compare less than" @@ -11408,7 +11234,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmgt]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float64x1_t, uint64x1_t] @@ -11422,19 +11248,19 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["f32", "u32", 's_f32'] - ["f64", "u64", 'd_f64'] compose: - FnCall: - - simd_extract! + - 'vget_lane_{type[1]}' - - FnCall: - "vclt_{type[0]}" - - FnCall: ["vdup_n_{type[0]}", [a]] - FnCall: ["vdup_n_{type[0]}", [b]] - - '0' + - - '0' - name: "vclt{type[2]}" @@ -11451,29 +11277,38 @@ intrinsics: - ["f16", "u16", 'h_f16'] compose: - FnCall: - - simd_extract! + - 'vget_lane_{type[1]}' - - FnCall: - "vclt_{type[0]}" - - FnCall: ["vdup_n_{type[0]}", [a]] - FnCall: ["vdup_n_{type[0]}", [b]] - - '0' + - - '0' - - name: "vabdl_high_{neon_type[0]}" - doc: "Unsigned Absolute difference Long" + - name: "vabdl_high{neon_type[0].noq}" + doc: Unsigned Absolute difference Long arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] return_type: "{neon_type[1]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uabdl2]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [uabdl2]]}]] safety: safe types: - - [uint8x16_t, uint16x8_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]'] - - [uint16x8_t, uint32x4_t, uint16x4_t, '[4, 5, 6, 7]'] - - [uint32x4_t, uint64x2_t, uint32x2_t, '[2, 3]'] + - [uint8x16_t, uint16x8_t] + - [uint16x8_t, uint32x4_t] + - [uint32x4_t, uint64x2_t] compose: - - Let: [c, "{neon_type[2]}", {FnCall: [simd_shuffle!, [a, a, "{type[3]}"]]}] - - Let: [d, "{neon_type[2]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}] - - FnCall: [simd_cast, [{FnCall: ["vabd_{neon_type[0]}", [c, d]]}]] + - Let: + - c + - FnCall: ['vget_high_{neon_type[0]}', [a]] + - Let: + - d + - FnCall: ['vget_high_{neon_type[0]}', [b]] + - FnCall: + - simd_cast + - - FnCall: + - "vabd_{neon_type[0]}" + - - c + - d - name: "vfms_n_f64" doc: "Floating-point fused Multiply-subtract to accumulator(vector)" @@ -11481,7 +11316,7 @@ intrinsics: return_type: "{neon_type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float64x1_t, "f64"] @@ -11498,7 +11333,7 @@ intrinsics: return_type: "{neon_type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmls]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float64x2_t, "f64"] @@ -11539,8 +11374,9 @@ intrinsics: return_type: "{type[2]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fminnmp]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe + big_endian_inverse: true types: - ['s_f32', float32x2_t, "f32"] - ['qd_f64', float64x2_t, "f64"] @@ -11557,8 +11393,9 @@ intrinsics: return_type: "{type[2]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmaxnmp]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe + big_endian_inverse: true types: - ['s_f32', float32x2_t, "f32"] - ['qd_f64', float64x2_t, "f64"] @@ -11575,7 +11412,7 @@ intrinsics: return_type: "{type[2]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["s64", "i64", "u64"] @@ -11595,7 +11432,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmulh, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE: i32'] safety: safe types: @@ -11610,14 +11447,14 @@ intrinsics: - - a - FnCall: - "vdup{neon_type[0].N}" - - - FnCall: [simd_extract!, [b, 'LANE as u32']] + - - FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]] - name: "vqabs{type[2]}" doc: "Signed saturating absolute value" arguments: ["a: {type[0]}"] return_type: "{type[0]}" attr: - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sqabs]]}]] safety: safe types: @@ -11625,16 +11462,16 @@ intrinsics: - ["i16", "s16", 'h_s16'] compose: - FnCall: - - "simd_extract!" + - 'vget_lane_{type[0]}' - - FnCall: ["vqabs_{type[1]}", [{FnCall: ["vdup_n_{type[1]}", [a]]}]] - - '0' + - - '0' - name: "vqabs{type[1]}" doc: "Signed saturating absolute value" arguments: ["a: {type[0]}"] return_type: "{type[0]}" attr: - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sqabs]]}]] safety: safe types: @@ -11652,8 +11489,8 @@ intrinsics: arguments: ["a: {neon_type[0]}", "b: {type[1]}"] return_type: "{neon_type[2]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smull2]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smull2]]}]] + - *neon-stable safety: safe types: - [int16x8_t, "i16", int32x4_t] @@ -11669,8 +11506,8 @@ intrinsics: arguments: ["a: {neon_type[0]}", "b: {type[1]}"] return_type: "{neon_type[2]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umull2]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umull2]]}]] + - *neon-stable safety: safe types: - [uint16x8_t, "u16", uint32x4_t] @@ -11686,44 +11523,44 @@ intrinsics: arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] return_type: "{neon_type[2]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smull2, 'LANE = 1']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smull2, 'LANE = 1']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE: i32'] safety: safe types: - - [int16x8_t, int16x4_t, int32x4_t, '2', '[LANE as u32; 8]'] - - [int16x8_t, int16x8_t, int32x4_t, '3', '[LANE as u32; 8]'] - - [int32x4_t, int32x2_t, int64x2_t, '1', '[LANE as u32; 4]'] - - [int32x4_t, int32x4_t, int64x2_t, '2', '[LANE as u32; 4]'] + - [int16x8_t, int16x4_t, int32x4_t, '2'] + - [int16x8_t, int16x8_t, int32x4_t, '3'] + - [int32x4_t, int32x2_t, int64x2_t, '1'] + - [int32x4_t, int32x4_t, int64x2_t, '2'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]] - FnCall: - "vmull_high_{neon_type[0]}" - - a - - FnCall: [simd_shuffle!, [b, b, '{type[4]}']] + - FnCall: ['vdupq_lane{neon_type[1].no}', [b], [LANE]] - name: "vmull_high_lane{neon_type[1].no}" doc: "Multiply long" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] return_type: "{neon_type[2]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umull2, 'LANE = 1']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umull2, 'LANE = 1']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE: i32'] safety: safe types: - - [uint16x8_t, uint16x4_t, uint32x4_t, '2', '[LANE as u32; 8]'] - - [uint16x8_t, uint16x8_t, uint32x4_t, '3', '[LANE as u32; 8]'] - - [uint32x4_t, uint32x2_t, uint64x2_t, '1', '[LANE as u32; 4]'] - - [uint32x4_t, uint32x4_t, uint64x2_t, '2', '[LANE as u32; 4]'] + - [uint16x8_t, uint16x4_t, uint32x4_t, '2'] + - [uint16x8_t, uint16x8_t, uint32x4_t, '3'] + - [uint32x4_t, uint32x2_t, uint64x2_t, '1'] + - [uint32x4_t, uint32x4_t, uint64x2_t, '2'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]] - FnCall: - "vmull_high_{neon_type[0]}" - - a - - FnCall: [simd_shuffle!, [b, b, '{type[4]}']] + - FnCall: ['vdupq_lane{neon_type[1].no}', [b], [LANE]] - name: "vrsqrte{neon_type.no}" doc: "Reciprocal square-root estimate." @@ -11731,7 +11568,7 @@ intrinsics: return_type: "{neon_type}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrte]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - float64x1_t @@ -11749,7 +11586,7 @@ intrinsics: return_type: "{type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrte]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["s_f32", "f32"] @@ -11788,8 +11625,9 @@ intrinsics: return_type: "{neon_type}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fminnmp]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe + big_endian_inverse: true types: - float32x2_t - float64x2_t @@ -11808,7 +11646,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshlu, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['1']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -11819,11 +11657,11 @@ intrinsics: compose: - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]] - FnCall: - - simd_extract! + - 'vget_lane_{type[2]}' - - FnCall: - "vqshlu_n_{type[4]}::" - - FnCall: ["vdup_n_{type[4]}", [a]] - - '0' + - - '0' - name: "vcvta{neon_type[1].no}_{neon_type[0]}" doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to away" @@ -11831,7 +11669,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float32x2_t, uint32x2_t] @@ -11873,7 +11711,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [float64x1_t, int64x1_t] @@ -11896,6 +11734,7 @@ intrinsics: - *target-not-arm64ec safety: safe types: + - ["f16", "i16", 'h'] - ["f16", "i32", 'h'] - ["f16", "i64", 'h'] compose: @@ -11906,22 +11745,6 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtms.{type[1]}.{type[0]}" arch: aarch64,arm64ec - - name: "vcvtm{type[2]}_{type[1]}_{type[0]}" - doc: "Floating-point convert to integer, rounding towards minus infinity" - arguments: ["a: {type[0]}"] - return_type: "{type[1]}" - attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]] - - *neon-fp16 - - *neon-unstable-f16 - - *target-not-arm64ec - safety: safe - types: - - ["f16", "i16", 'h', 'i32'] - compose: - - 'vcvtmh_{type[3]}_f16(a) as i16' - - - name: "vcvtm{type[2]}_{type[1]}_{type[0]}" doc: "Floating-point convert to unsigned integer, rounding towards minus infinity" arguments: ["a: {type[0]}"] @@ -11933,6 +11756,7 @@ intrinsics: - *target-not-arm64ec safety: safe types: + - ["f16", "u16", 'h'] - ["f16", "u32", 'h'] - ["f16", "u64", 'h'] compose: @@ -11943,28 +11767,13 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtmu.{type[1]}.{type[0]}" arch: aarch64,arm64ec - - name: "vcvtm{type[2]}_{type[1]}_{type[0]}" - doc: "Floating-point convert to integer, rounding towards minus infinity" - arguments: ["a: {type[0]}"] - return_type: "{type[1]}" - attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]] - - *neon-fp16 - - *neon-unstable-f16 - - *target-not-arm64ec - safety: safe - types: - - ["f16", "u16", 'h', 'u32'] - compose: - - 'vcvtmh_{type[3]}_f16(a) as u16' - - name: "vmlal_high_n_{neon_type[1]}" doc: "Multiply-add long" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"] return_type: "{neon_type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlal2]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smlal2]]}]] + - *neon-stable safety: safe types: - [int32x4_t, int16x8_t, "i16"] @@ -11981,8 +11790,8 @@ intrinsics: arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"] return_type: "{neon_type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlal2]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umlal2]]}]] + - *neon-stable safety: safe types: - [uint32x4_t, uint16x8_t, "u16"] @@ -11999,38 +11808,38 @@ intrinsics: arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] return_type: "{neon_type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlal2, 'LANE = 1']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smlal2, 'LANE = 1']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE: i32'] safety: safe types: - - [int32x4_t, int16x8_t, int16x4_t, '2', '[LANE as u32; 8]'] - - [int32x4_t, int16x8_t, int16x8_t, '3', '[LANE as u32; 8]'] - - [int64x2_t, int32x4_t, int32x2_t, '1', '[LANE as u32; 4]'] - - [int64x2_t, int32x4_t, int32x4_t, '2', '[LANE as u32; 4]'] + - [int32x4_t, int16x8_t, int16x4_t, '2'] + - [int32x4_t, int16x8_t, int16x8_t, '3'] + - [int64x2_t, int32x4_t, int32x2_t, '1'] + - [int64x2_t, int32x4_t, int32x4_t, '2'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] - - FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: [simd_shuffle!, [c, c, '{type[4]}']]}]] + - FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: ['vdupq_lane{neon_type[2].no}', [c], [LANE]]}]] - name: "vmlal_high_lane{neon_type[2].no}" doc: "Multiply-add long" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] return_type: "{neon_type[0]}" attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlal2, 'LANE = 1']]}]] + - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umlal2, 'LANE = 1']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const LANE: i32'] safety: safe types: - - [uint32x4_t, uint16x8_t, uint16x4_t, '2', '[LANE as u32; 8]'] - - [uint32x4_t, uint16x8_t, uint16x8_t, '3', '[LANE as u32; 8]'] - - [uint64x2_t, uint32x4_t, uint32x2_t, '1', '[LANE as u32; 4]'] - - [uint64x2_t, uint32x4_t, uint32x4_t, '2', '[LANE as u32; 4]'] + - [uint32x4_t, uint16x8_t, uint16x4_t, '2'] + - [uint32x4_t, uint16x8_t, uint16x8_t, '3'] + - [uint64x2_t, uint32x4_t, uint32x2_t, '1'] + - [uint64x2_t, uint32x4_t, uint32x4_t, '2'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] - - FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: [simd_shuffle!, [c, c, '{type[4]}']]}]] + - FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: ['vdupq_lane{neon_type[2].no}', [c], [LANE]]}]] - name: "vrsrad_n_u64" doc: "Unsigned rounding shift right and accumulate." @@ -12039,7 +11848,7 @@ intrinsics: attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [urshr, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['2']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable static_defs: ['const N: i32'] safety: safe types: @@ -12055,7 +11864,7 @@ intrinsics: return_type: "{neon_type}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmhs]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - uint64x1_t @@ -12069,7 +11878,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4r]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: unsafe: [neon] types: @@ -12092,7 +11901,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4r]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: unsafe: [neon] types: @@ -12109,7 +11918,7 @@ intrinsics: attr: - FnCall: [target_feature, ['enable = "neon,aes"']] - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4r]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: unsafe: [neon] types: @@ -12125,21 +11934,29 @@ intrinsics: return_type: "{neon_type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - - [int8x8_t, int8x8x4_t] + - [int8x8_t, 'int8x8x4_t', 'int8x16x2', 'int8x8', 'i8x8::splat(32)'] compose: + - Let: + - x + - FnCall: + - '{type[2]}_t' + - - FnCall: ['vcombine{neon_type[0].no}', ['b.0', 'b.1']] + - FnCall: ['vcombine{neon_type[0].no}', ['b.2', 'b.3']] - FnCall: - - "vqtbx2" - - - FnCall: [transmute, [a]] - - FnCall: - - transmute - - - FnCall: ["vcombine{neon_type[0].noq}", ["b.0", "b.1"]] - - FnCall: - - transmute - - - FnCall: ["vcombine{neon_type[0].noq}", ["b.2", "b.3"]] - - FnCall: [transmute, [c]] + - simd_select + - - FnCall: + - "simd_lt::<{type[3]}_t, int8x8_t>" + - - c + - FnCall: [transmute, ["{type[4]}"]] + - FnCall: + - 'vqtbx2{neon_type[0].no}' + - - a + - x + - FnCall: ['vreinterpret_u8{neon_type[0].no}', [c]] + - a - name: "vtbx4{neon_type[0].no}" doc: "Extended table look-up" @@ -12147,25 +11964,30 @@ intrinsics: return_type: "{neon_type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] - big_endian_inverse: false + - *neon-stable safety: safe types: - - [uint8x8_t, uint8x8x4_t, uint8x8_t] - - [poly8x8_t, poly8x8x4_t, uint8x8_t] + - [uint8x8_t, 'uint8x8x4_t', 'uint8x8_t', 'uint8x16x2', 'uint8x8', 'u8x8::splat(32)'] + - [poly8x8_t, 'poly8x8x4_t', 'uint8x8_t', 'poly8x16x2', 'uint8x8', 'u8x8::splat(32)'] compose: + - Let: + - x + - FnCall: + - '{type[3]}_t' + - - FnCall: ['vcombine{neon_type[0].no}', ['b.0', 'b.1']] + - FnCall: ['vcombine{neon_type[0].no}', ['b.2', 'b.3']] - FnCall: - - transmute - - - FnCall: - - "vqtbx2" - - - FnCall: [transmute, [a]] - - FnCall: - - transmute - - - FnCall: ["vcombine{neon_type[0].noq}", ["b.0", "b.1"]] - - FnCall: - - transmute - - - FnCall: ["vcombine{neon_type[0].noq}", ["b.2", "b.3"]] - - c + - simd_select + - - FnCall: + - "simd_lt::<{type[4]}_t, int8x8_t>" + - - c + - FnCall: [transmute, ["{type[5]}"]] + - FnCall: + - 'vqtbx2{neon_type[0].no}' + - - a + - x + - c + - a - name: "vtbl1{neon_type[0].no}" doc: "Table look-up" @@ -12173,7 +11995,7 @@ intrinsics: return_type: "{neon_type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [int8x8_t, 'int8x8_t', 'unsafe {{ transmute(b) }}'] @@ -12188,26 +12010,21 @@ intrinsics: - 'unsafe {{ crate::mem::zeroed() }}' - Identifier: ['{type[2]}', Symbol] - - name: "vtbl2{neon_type[1].noq}" + - name: "vtbl2{neon_type[2].no}" doc: "Table look-up" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] - return_type: "{neon_type[1]}" + return_type: "{neon_type[2]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - - [int8x8x2_t, 'int8x8_t'] + - ['int8x8x2_t', 'int8x8_t', 'int8x8_t'] compose: - FnCall: - - vqtbl1 - - - FnCall: - - transmute - - - FnCall: - - 'vcombine{neon_type[1].noq}' - - - 'a.0' - - 'a.1' - - FnCall: [transmute, [b]] + - 'vqtbl1{neon_type[2].noq}' + - - FnCall: ['vcombine{neon_type[2].noq}', ['a.0', 'a.1']] + - FnCall: ['vreinterpret_u8{neon_type[2].noq}', [b]] - name: "vtbl2{neon_type[2].no}" doc: "Table look-up" @@ -12215,128 +12032,107 @@ intrinsics: return_type: "{neon_type[2]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] - big_endian_inverse: false + - *neon-stable safety: safe types: - - [uint8x8x2_t, 'uint8x8_t', 'uint8x8_t'] - - [poly8x8x2_t, 'uint8x8_t', 'poly8x8_t'] + - ['uint8x8x2_t', 'uint8x8_t', 'uint8x8_t'] + - ['poly8x8x2_t', 'uint8x8_t', 'poly8x8_t'] compose: - FnCall: - - transmute - - - FnCall: - - vqtbl1 - - - FnCall: - - transmute - - - FnCall: - - 'vcombine{neon_type[2].noq}' - - - 'a.0' - - 'a.1' - - b + - 'vqtbl1{neon_type[2].noq}' + - - FnCall: ['vcombine{neon_type[2].noq}', ['a.0', 'a.1']] + - b - name: "vtbl3{neon_type[1].no}" doc: "Table look-up" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] - return_type: "{neon_type[1]}" + return_type: "{neon_type[2]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - - [int8x8x3_t, 'int8x8_t', 'int8x16x2'] + - ['int8x8x3_t', 'int8x8_t', 'int8x8_t', 'int8x16x2'] compose: - Let: - - x - - FnCall: - - '{type[2]}_t' - - - FnCall: ['vcombine{neon_type[1].no}', ['a.0', 'a.1']] - - FnCall: ['vcombine{neon_type[1].no}', ['a.2', 'unsafe {{ crate::mem::zeroed() }}']] + - x + - FnCall: + - '{type[3]}_t' + - - FnCall: ['vcombine{neon_type[2].no}', ['a.0', 'a.1']] + - FnCall: ['vcombine{neon_type[2].no}', ['a.2', 'unsafe {{ crate::mem::zeroed() }}']] - FnCall: - - transmute - - - FnCall: - - vqtbl2 - - - FnCall: [transmute, ['x.0']] - - FnCall: [transmute, ['x.1']] - - FnCall: [transmute, [b]] + - 'vqtbl2{neon_type[2].no}' + - - x + - FnCall: ['vreinterpret_u8{neon_type[2].noq}', [b]] - - name: "vtbl3{neon_type[3].no}" + + - name: "vtbl3{neon_type[2].no}" doc: "Table look-up" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] - return_type: "{neon_type[3]}" + return_type: "{neon_type[2]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - - [uint8x8x3_t, 'uint8x8_t', 'uint8x16x2', 'uint8x8_t'] - - [poly8x8x3_t, 'uint8x8_t', 'poly8x16x2', 'poly8x8_t'] - big_endian_inverse: false + - [uint8x8x3_t, 'uint8x8_t', 'uint8x8_t', 'uint8x16x2'] + - [poly8x8x3_t, 'uint8x8_t', 'poly8x8_t', 'poly8x16x2'] compose: - Let: - - x - - FnCall: - - '{type[2]}_t' - - - FnCall: ['vcombine{neon_type[3].no}', ['a.0', 'a.1']] - - FnCall: ['vcombine{neon_type[3].no}', ['a.2', 'unsafe {{ crate::mem::zeroed() }}']] - - FnCall: - - transmute - - - FnCall: - - vqtbl2 - - - FnCall: [transmute, ['x.0']] - - FnCall: [transmute, ['x.1']] - - b + - x + - FnCall: + - '{type[3]}_t' + - - FnCall: ['vcombine{neon_type[2].no}', ['a.0', 'a.1']] + - FnCall: ['vcombine{neon_type[2].no}', ['a.2', 'unsafe {{ crate::mem::zeroed() }}']] + - FnCall: + - 'vqtbl2{neon_type[2].no}' + - - x + - b - - name: "vtbl4{neon_type[1].no}" + - name: "vtbl4{neon_type[2].no}" doc: "Table look-up" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] - return_type: "{neon_type[1]}" + return_type: "{neon_type[2]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - - [int8x8x4_t, 'int8x8_t', 'int8x16x2'] + - ['int8x8x4_t', 'int8x8_t', 'int8x8_t', 'int8x16x2'] compose: - Let: - - x - - FnCall: - - '{type[2]}_t' - - - FnCall: ['vcombine{neon_type[1].no}', ['a.0', 'a.1']] - - FnCall: ['vcombine{neon_type[1].no}', ['a.2', 'a.3']] + - x + - FnCall: + - '{type[3]}_t' + - - FnCall: ['vcombine{neon_type[1].no}', ['a.0', 'a.1']] + - FnCall: ['vcombine{neon_type[1].no}', ['a.2', 'a.3']] - FnCall: - - transmute - - - FnCall: - - 'vqtbl2' - - - FnCall: [transmute, ['x.0']] - - FnCall: [transmute, ['x.1']] - - FnCall: [transmute, [b]] + - 'vqtbl2{neon_type[2].no}' + - - x + - FnCall: ['vreinterpret_u8{neon_type[2].noq}', [b]] - - name: "vtbl4{neon_type[3].no}" + - name: "vtbl4{neon_type[2].no}" doc: "Table look-up" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] - return_type: "{neon_type[3]}" + return_type: "{neon_type[2]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - - [uint8x8x4_t, 'uint8x8_t', 'uint8x16x2', 'uint8x8_t'] - - [poly8x8x4_t, 'uint8x8_t', 'poly8x16x2', 'poly8x8_t'] - big_endian_inverse: false + - [uint8x8x4_t, 'uint8x8_t', 'uint8x8_t', 'uint8x16x2'] + - [poly8x8x4_t, 'uint8x8_t', 'poly8x8_t', 'poly8x16x2'] compose: - Let: - - x - - FnCall: - - '{type[2]}_t' - - - FnCall: ['vcombine{neon_type[3].no}', ['a.0', 'a.1']] - - FnCall: ['vcombine{neon_type[3].no}', ['a.2', 'a.3']] - - FnCall: - - transmute - - - FnCall: - - 'vqtbl2' - - - FnCall: [transmute, ['x.0']] - - FnCall: [transmute, ['x.1']] - - b + - x + - FnCall: + - '{type[3]}_t' + - - FnCall: ['vcombine{neon_type[2].no}', ['a.0', 'a.1']] + - FnCall: ['vcombine{neon_type[2].no}', ['a.2', 'a.3']] + - FnCall: + - 'vqtbl2{neon_type[2].no}' + - - x + - b - name: "vqtbx1{neon_type[0].no}" doc: "Extended table look-up" @@ -12344,8 +12140,9 @@ intrinsics: return_type: "{neon_type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe + big_endian_inverse: true types: - [int8x8_t, int8x16_t, uint8x8_t, vqtbx1] - [int8x16_t, int8x16_t, uint8x16_t, vqtbx1q] @@ -12358,14 +12155,14 @@ intrinsics: return_type: "{neon_type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [uint8x8_t, "uint8x16_t", uint8x8_t, "vqtbx1", "_u8"] - [poly8x8_t, "poly8x16_t", uint8x8_t, "vqtbx1", "_p8"] - [uint8x16_t, "uint8x16_t", uint8x16_t, "vqtbx1q", "q_u8"] - [poly8x16_t, "poly8x16_t", uint8x16_t, "vqtbx1q", "q_p8"] - big_endian_inverse: false + big_endian_inverse: true compose: - FnCall: - transmute @@ -12381,29 +12178,48 @@ intrinsics: return_type: "{neon_type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - - [int8x8_t, "int8x8_t", "transmute(c)", "i8x8::splat(8)", "int8x8"] - - [uint8x8_t, "uint8x8_t", "c", "u8x8::splat(8)", "uint8x8"] - - [poly8x8_t, "uint8x8_t", "c", "u8x8::splat(8)", "uint8x8"] + - [int8x8_t, "int8x8_t", "int8x8", "i8x8::splat(8)"] compose: - FnCall: - - simd_select - - - FnCall: - - "simd_lt::<{type[4]}_t, int8x8_t>" - - - c - - FnCall: [transmute, ["{type[3]}"]] - - FnCall: - - transmute - - - FnCall: - - "vqtbx1" - - - "transmute(a)" - - FnCall: - - transmute - - - FnCall: ["vcombine{neon_type[0].no}", [b, "crate::mem::zeroed()"]] - - "{type[2]}" - - a + - simd_select + - - FnCall: + - "simd_lt::<{type[2]}_t, int8x8_t>" + - - c + - FnCall: [transmute, ["{type[3]}"]] + - FnCall: + - 'vqtbx1{neon_type[0].no}' + - - a + - FnCall: ['vcombine{neon_type[0].no}', [b, 'crate::mem::zeroed()']] + - FnCall: ['vreinterpret_u8{neon_type[0].no}', [c]] + - a + + - name: "vtbx1{neon_type[0].no}" + doc: "Extended table look-up" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] + - *neon-stable + safety: safe + types: + - [uint8x8_t, "uint8x8_t", "uint8x8", "u8x8::splat(8)"] + - [poly8x8_t, "uint8x8_t", "uint8x8", "u8x8::splat(8)"] + compose: + - FnCall: + - simd_select + - - FnCall: + - "simd_lt::<{type[2]}_t, int8x8_t>" + - - c + - FnCall: [transmute, ["{type[3]}"]] + - FnCall: + - 'vqtbx1{neon_type[0].no}' + - - a + - FnCall: ['vcombine{neon_type[0].no}', [b, 'crate::mem::zeroed()']] + - c + - a - name: "vtbx2{neon_type[0].no}" doc: "Extended table look-up" @@ -12411,18 +12227,23 @@ intrinsics: return_type: "{neon_type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - - [int8x8_t, 'int8x8x2_t'] + - [int8x8_t, 'int8x8x2_t', 'int8x8', 'i8x8::splat(16)'] compose: - FnCall: - - vqtbx1 - - - FnCall: [transmute, [a]] - - FnCall: - - transmute - - - FnCall: ["vcombine{neon_type[0].no}", ['b.0', 'b.1']] - - FnCall: [transmute, [c]] + - simd_select + - - FnCall: + - "simd_lt::<{type[2]}_t, int8x8_t>" + - - c + - FnCall: [transmute, ["{type[3]}"]] + - FnCall: + - 'vqtbx1{neon_type[0].no}' + - - a + - FnCall: ["vcombine{neon_type[0].no}", ['b.0', 'b.1']] + - FnCall: ['vreinterpret_u8{neon_type[0].no}', [c]] + - a - name: "vtbx2{neon_type[0].no}" doc: "Extended table look-up" @@ -12430,22 +12251,24 @@ intrinsics: return_type: "{neon_type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] - big_endian_inverse: false + - *neon-stable safety: safe types: - - [uint8x8_t, 'uint8x8x2_t', uint8x8_t] - - [poly8x8_t, 'poly8x8x2_t', uint8x8_t] + - [uint8x8_t, 'uint8x8x2_t', uint8x8_t, 'uint8x8', 'u8x8::splat(16)'] + - [poly8x8_t, 'poly8x8x2_t', uint8x8_t, 'uint8x8', 'u8x8::splat(16)'] compose: - FnCall: - - transmute - - - FnCall: - - vqtbx1 - - - FnCall: [transmute, [a]] - - FnCall: - - transmute - - - FnCall: ["vcombine{neon_type[0].no}", ['b.0', 'b.1']] - - c + - simd_select + - - FnCall: + - "simd_lt::<{type[3]}_t, int8x8_t>" + - - c + - FnCall: [transmute, ["{type[4]}"]] + - FnCall: + - 'vqtbx1{neon_type[0].no}' + - - a + - FnCall: ["vcombine{neon_type[0].no}", ['b.0', 'b.1']] + - c + - a - name: "vtbx3{neon_type[0].no}" doc: "Extended table look-up" @@ -12453,34 +12276,29 @@ intrinsics: return_type: "{neon_type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - - [int8x8_t, 'int8x8x3_t', 'int8x16x2', 'i8x8::splat(24)', 'int8x8'] + - [int8x8_t, 'int8x8x3_t', 'int8x16x2', 'int8x8', 'i8x8::splat(24)'] compose: - Let: - - x - - FnCall: - - '{type[2]}_t' - - - FnCall: ['vcombine{neon_type[0].no}', ['b.0', 'b.1']] - - FnCall: ['vcombine{neon_type[0].no}', ['b.2', 'unsafe {{ crate::mem::zeroed() }}']] + - x + - FnCall: + - '{type[2]}_t' + - - FnCall: ['vcombine{neon_type[0].no}', ['b.0', 'b.1']] + - FnCall: ['vcombine{neon_type[0].no}', ['b.2', 'unsafe {{ crate::mem::zeroed() }}']] - FnCall: - - transmute - - - FnCall: - - simd_select - - - FnCall: - - 'simd_lt::<{type[4]}_t, int8x8_t>' - - - FnCall: [transmute, [c]] - - FnCall: [transmute, ['{type[3]}']] - - FnCall: - - transmute - - - FnCall: - - 'vqtbx2' - - - FnCall: [transmute, [a]] - - FnCall: [transmute, ['x.0']] - - FnCall: [transmute, ['x.1']] - - FnCall: [transmute, [c]] - - a + - simd_select + - - FnCall: + - "simd_lt::<{type[3]}_t, int8x8_t>" + - - c + - FnCall: [transmute, ["{type[4]}"]] + - FnCall: + - 'vqtbx2{neon_type[0].no}' + - - a + - x + - FnCall: ['vreinterpret_u8{neon_type[0].no}', [c]] + - a - name: "vtbx3{neon_type[0].no}" doc: "Extended table look-up" @@ -12488,12 +12306,11 @@ intrinsics: return_type: "{neon_type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - - [uint8x8_t, 'uint8x8x3_t', 'uint8x16x2', 'u8x8::splat(24)', 'uint8x8'] - - [poly8x8_t, 'poly8x8x3_t', 'poly8x16x2', 'u8x8::splat(24)', 'poly8x8'] - big_endian_inverse: false + - [uint8x8_t, 'uint8x8x3_t', 'uint8x16x2', 'uint8x8', 'u8x8::splat(24)'] + - [poly8x8_t, 'poly8x8x3_t', 'poly8x16x2', 'uint8x8', 'u8x8::splat(24)'] compose: - Let: - x @@ -12502,22 +12319,17 @@ intrinsics: - - FnCall: ['vcombine{neon_type[0].no}', ['b.0', 'b.1']] - FnCall: ['vcombine{neon_type[0].no}', ['b.2', 'unsafe {{ crate::mem::zeroed() }}']] - FnCall: - - transmute - - - FnCall: - - simd_select - - - FnCall: - - 'simd_lt::<{type[4]}_t, int8x8_t>' - - - FnCall: [transmute, [c]] - - FnCall: [transmute, ['{type[3]}']] - - FnCall: - - transmute - - - FnCall: - - 'vqtbx2' - - - FnCall: [transmute, [a]] - - FnCall: [transmute, ['x.0']] - - FnCall: [transmute, ['x.1']] - - c - - a + - simd_select + - - FnCall: + - "simd_lt::<{type[3]}_t, int8x8_t>" + - - c + - FnCall: [transmute, ["{type[4]}"]] + - FnCall: + - 'vqtbx2{neon_type[0].no}' + - - a + - x + - c + - a - name: "vqtbl1{neon_type[3].no}" doc: "Table look-up" @@ -12525,8 +12337,9 @@ intrinsics: return_type: "{neon_type[3]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe + big_endian_inverse: true types: - ['int8x16_t', uint8x8_t, 'vqtbl1', 'int8x8_t'] - ['int8x16_t', uint8x16_t, 'vqtbl1q', 'int8x16_t'] @@ -12539,14 +12352,14 @@ intrinsics: return_type: "{neon_type[3]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ['uint8x16_t', uint8x8_t, 'vqtbl1', 'uint8x8_t'] - ['poly8x16_t', uint8x8_t, 'vqtbl1', 'poly8x8_t'] - ['uint8x16_t', uint8x16_t, 'vqtbl1q', 'uint8x16_t'] - ['poly8x16_t', uint8x16_t, 'vqtbl1q', 'poly8x16_t'] - big_endian_inverse: false + big_endian_inverse: true compose: - FnCall: - transmute @@ -12561,8 +12374,9 @@ intrinsics: return_type: "{neon_type[3]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe + big_endian_inverse: true types: - ['int8x16x2_t', uint8x8_t, 'vqtbl2', 'int8x8_t'] - ['int8x16x2_t', uint8x16_t, 'vqtbl2q', 'int8x16_t'] @@ -12575,8 +12389,8 @@ intrinsics: return_type: "{neon_type[3]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] - big_endian_inverse: false + - *neon-stable + big_endian_inverse: true safety: safe types: - ['uint8x16x2_t', uint8x8_t, 'vqtbl2', 'uint8x8_t'] @@ -12598,8 +12412,9 @@ intrinsics: return_type: "{neon_type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe + big_endian_inverse: true types: - [int8x8_t, 'int8x16x2_t', uint8x8_t, 'vqtbx2'] - [int8x16_t, 'int8x16x2_t', uint8x16_t, 'vqtbx2q'] @@ -12612,8 +12427,8 @@ intrinsics: return_type: "{neon_type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] - big_endian_inverse: false + - *neon-stable + big_endian_inverse: true safety: safe types: - [uint8x8_t, 'uint8x16x2_t', uint8x8_t, 'vqtbx2'] @@ -12636,8 +12451,8 @@ intrinsics: return_type: "{neon_type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] - big_endian_inverse: false + - *neon-stable + big_endian_inverse: true safety: safe types: - ['int8x8_t', 'int8x16x3_t', uint8x8_t, 'vqtbl3'] @@ -12651,8 +12466,8 @@ intrinsics: return_type: "{neon_type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] - big_endian_inverse: false + - *neon-stable + big_endian_inverse: true safety: safe types: - ['uint8x8_t', 'uint8x16x3_t', uint8x8_t, 'vqtbl3'] @@ -12675,8 +12490,9 @@ intrinsics: return_type: "{neon_type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe + big_endian_inverse: true types: - [int8x8_t, 'int8x16x3_t', uint8x8_t, 'vqtbx3'] - [int8x16_t, 'int8x16x3_t', uint8x16_t, 'vqtbx3q'] @@ -12689,8 +12505,8 @@ intrinsics: return_type: "{neon_type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] - big_endian_inverse: false + - *neon-stable + big_endian_inverse: true safety: safe types: - [uint8x8_t, 'uint8x16x3_t', uint8x8_t, 'vqtbx3'] @@ -12714,8 +12530,8 @@ intrinsics: return_type: "{neon_type[3]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] - big_endian_inverse: false + - *neon-stable + big_endian_inverse: true safety: safe types: - ['int8x16x4_t', uint8x8_t, 'vqtbl4', 'int8x8_t'] @@ -12729,8 +12545,8 @@ intrinsics: return_type: "{neon_type[3]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] - big_endian_inverse: false + - *neon-stable + big_endian_inverse: true safety: safe types: - ['uint8x16x4_t', uint8x8_t, 'vqtbl4', 'uint8x8_t'] @@ -12754,8 +12570,9 @@ intrinsics: return_type: "{neon_type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe + big_endian_inverse: true types: - [int8x8_t, 'int8x16x4_t', uint8x8_t, 'vqtbx4'] - [int8x16_t, 'int8x16x4_t', uint8x16_t, 'vqtbx4q'] @@ -12768,8 +12585,8 @@ intrinsics: return_type: "{neon_type[0]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] - big_endian_inverse: false + - *neon-stable + big_endian_inverse: true safety: safe types: - [uint8x8_t, 'uint8x16x4_t', uint8x8_t, 'vqtbx4'] @@ -12795,7 +12612,7 @@ intrinsics: return_type: "{neon_type[3]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["vqtbl1", "int8x16_t", "uint8x8_t", "int8x8_t"] @@ -12814,7 +12631,7 @@ intrinsics: return_type: "{neon_type[3]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - ["vqtbl2", "int8x16_t", "uint8x8_t", "int8x8_t"] @@ -12833,8 +12650,7 @@ intrinsics: return_type: "{neon_type[3]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] - big_endian_inverse: false + - *neon-stable safety: safe types: - ["vqtbl3", int8x16_t, uint8x8_t, int8x8_t] @@ -12853,8 +12669,7 @@ intrinsics: return_type: "{neon_type[3]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] - big_endian_inverse: false + - *neon-stable safety: safe types: - ["vqtbl4", int8x16_t, uint8x8_t, int8x8_t] @@ -12873,7 +12688,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [vqtbx1, "int8x8_t", "int8x16_t", "uint8x8_t"] @@ -12892,7 +12707,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [vqtbx2, "int8x8_t", "int8x16_t", "uint8x8_t"] @@ -12911,7 +12726,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [vqtbx3, "int8x8_t", "int8x16_t", "uint8x8_t"] @@ -12930,7 +12745,7 @@ intrinsics: return_type: "{neon_type[1]}" attr: - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: safe types: - [vqtbx4, "int8x8_t", "int8x16_t", "uint8x8_t"] @@ -12949,7 +12764,7 @@ intrinsics: attr: - FnCall: [target_feature, ['enable = "{type[2]}"']] - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ldr]]}]] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: unsafe: [neon] types: @@ -13016,7 +12831,7 @@ intrinsics: - FnCall: [target_feature, ['enable = "{type[2]}"']] - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [str]]}]] - FnCall: [allow, ['clippy::cast_ptr_alignment']] - - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']] + - *neon-stable safety: unsafe: [neon] types: @@ -13218,6 +13033,7 @@ intrinsics: - *neon-stable assert_instr: [addp] safety: safe + big_endian_inverse: true types: - [int8x16_t, "16"] - [int16x8_t, "8"] @@ -13506,6 +13322,7 @@ intrinsics: - *neon-stable assert_instr: ['sminp'] safety: safe + big_endian_inverse: true types: - int8x16_t - int16x8_t @@ -13525,6 +13342,7 @@ intrinsics: - *neon-stable assert_instr: ['uminp'] safety: safe + big_endian_inverse: true types: - uint8x16_t - uint16x8_t @@ -13544,6 +13362,7 @@ intrinsics: - *neon-stable assert_instr: ['fminp'] safety: safe + big_endian_inverse: true types: - float32x4_t - float64x2_t @@ -13562,6 +13381,7 @@ intrinsics: - *neon-stable assert_instr: ['smaxp'] safety: safe + big_endian_inverse: true types: - int8x16_t - int16x8_t @@ -13581,6 +13401,7 @@ intrinsics: - *neon-stable assert_instr: ['umaxp'] safety: safe + big_endian_inverse: true types: - uint8x16_t - uint16x8_t @@ -13600,6 +13421,7 @@ intrinsics: - *neon-stable assert_instr: ['fmaxp'] safety: safe + big_endian_inverse: true types: - float32x4_t - float64x2_t @@ -13754,6 +13576,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [fmlal2] safety: safe + big_endian_inverse: true types: - [float32x2_t, float16x4_t, '_high_'] - [float32x4_t, float16x8_t, 'q_high_'] @@ -13765,7 +13588,7 @@ intrinsics: arch: aarch64,arm64ec - - name: "vfmlal{type[3]}{neon_type[1]}" + - name: "vfmlal{type[3]}_high_{neon_type[1]}" doc: "Floating-point fused Multiply-Add Long to accumulator (by element)." arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[2]}"] return_type: "{neon_type[0]}" @@ -13779,18 +13602,17 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [float32x2_t, float16x4_t, float16x4_t, '_lane_high_', '_high_', '2'] - - [float32x2_t, float16x4_t, float16x8_t, '_laneq_high_', '_high_', '3'] - - [float32x4_t, float16x8_t, float16x4_t, 'q_lane_high_', 'q_high_', '2'] - - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq_high_', 'q_high_', '3'] + - [float32x2_t, float16x4_t, float16x4_t, '_lane', '_high_', '2'] + - [float32x2_t, float16x4_t, float16x8_t, '_laneq', '_high_', '3'] + - [float32x4_t, float16x8_t, float16x4_t, 'q_lane', 'q_high_', '2'] + - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq', 'q_high_', '3'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]] - FnCall: - "vfmlal{type[4]}{neon_type[1]}" - - r - a - - FnCall: ["vdup{neon_type[1].N}", [{FnCall: [simd_extract!, [b, 'LANE as u32']]}]] - + - FnCall: ["vdup{type[3]}_{neon_type[2]}", [b], [LANE]] - name: "vfmlal{type[2]}{neon_type[1]}" doc: "Floating-point fused Multiply-Add Long to accumulator (vector)." @@ -13803,6 +13625,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [fmlal] safety: safe + big_endian_inverse: true types: - [float32x2_t, float16x4_t, '_low_'] - [float32x4_t, float16x8_t, 'q_low_'] @@ -13814,7 +13637,7 @@ intrinsics: arch: aarch64,arm64ec - - name: "vfmlal{type[3]}{neon_type[1]}" + - name: "vfmlal{type[3]}_low_{neon_type[1]}" doc: "Floating-point fused Multiply-Add Long to accumulator (by element)." arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[2]}"] return_type: "{neon_type[0]}" @@ -13828,18 +13651,17 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [float32x2_t, float16x4_t, float16x4_t, '_lane_low_', '_low_', '2'] - - [float32x2_t, float16x4_t, float16x8_t, '_laneq_low_', '_low_', '3'] - - [float32x4_t, float16x8_t, float16x4_t, 'q_lane_low_', 'q_low_', '2'] - - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq_low_', 'q_low_', '3'] + - [float32x2_t, float16x4_t, float16x4_t, '_lane', '_low_', '2'] + - [float32x2_t, float16x4_t, float16x8_t, '_laneq', '_low_', '3'] + - [float32x4_t, float16x8_t, float16x4_t, 'q_lane', 'q_low_', '2'] + - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq', 'q_low_', '3'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]] - FnCall: - "vfmlal{type[4]}{neon_type[1]}" - - r - a - - FnCall: ["vdup{neon_type[1].N}", [{FnCall: [simd_extract!, [b, 'LANE as u32']]}]] - + - FnCall: ["vdup{type[3]}_{neon_type[2]}", [b], [LANE]] - name: "vfmlsl{type[2]}{neon_type[1]}" doc: "Floating-point fused Multiply-Subtract Long from accumulator (vector)." @@ -13852,6 +13674,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [fmlsl2] safety: safe + big_endian_inverse: true types: - [float32x2_t, float16x4_t, '_high_'] - [float32x4_t, float16x8_t, 'q_high_'] @@ -13862,7 +13685,7 @@ intrinsics: - link: "llvm.aarch64.neon.fmlsl2.{neon_type[0]}.{neon_type[1]}" arch: aarch64,arm64ec - - name: "vfmlsl{type[3]}{neon_type[1]}" + - name: "vfmlsl{type[3]}_high_{neon_type[1]}" doc: "Floating-point fused Multiply-Subtract Long from accumulator (by element)." arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[2]}"] return_type: "{neon_type[0]}" @@ -13876,18 +13699,17 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [float32x2_t, float16x4_t, float16x4_t, '_lane_high_', '_high_', '2'] - - [float32x2_t, float16x4_t, float16x8_t, '_laneq_high_', '_high_', '3'] - - [float32x4_t, float16x8_t, float16x4_t, 'q_lane_high_', 'q_high_', '2'] - - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq_high_', 'q_high_', '3'] + - [float32x2_t, float16x4_t, float16x4_t, '_lane', '_high_', '2'] + - [float32x2_t, float16x4_t, float16x8_t, '_laneq', '_high_', '3'] + - [float32x4_t, float16x8_t, float16x4_t, 'q_lane', 'q_high_', '2'] + - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq', 'q_high_', '3'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]] - FnCall: - "vfmlsl{type[4]}{neon_type[1]}" - - r - a - - FnCall: ["vdup{neon_type[1].N}", [{FnCall: [simd_extract!, [b, 'LANE as u32']]}]] - + - FnCall: ["vdup{type[3]}_{neon_type[2]}", [b], [LANE]] - name: "vfmlsl{type[2]}{neon_type[1]}" doc: "Floating-point fused Multiply-Subtract Long from accumulator (vector)." @@ -13900,6 +13722,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [fmlsl] safety: safe + big_endian_inverse: true types: - [float32x2_t, float16x4_t, '_low_'] - [float32x4_t, float16x8_t, 'q_low_'] @@ -13910,7 +13733,7 @@ intrinsics: - link: "llvm.aarch64.neon.fmlsl.{neon_type[0]}.{neon_type[1]}" arch: aarch64,arm64ec - - name: "vfmlsl{type[3]}{neon_type[1]}" + - name: "vfmlsl{type[3]}_low_{neon_type[1]}" doc: "Floating-point fused Multiply-Subtract Long from accumulator (by element)." arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[2]}"] return_type: "{neon_type[0]}" @@ -13924,17 +13747,17 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [float32x2_t, float16x4_t, float16x4_t, '_lane_low_', '_low_', '2'] - - [float32x2_t, float16x4_t, float16x8_t, '_laneq_low_', '_low_', '3'] - - [float32x4_t, float16x8_t, float16x4_t, 'q_lane_low_', 'q_low_', '2'] - - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq_low_', 'q_low_', '3'] + - [float32x2_t, float16x4_t, float16x4_t, '_lane', '_low_', '2'] + - [float32x2_t, float16x4_t, float16x8_t, '_laneq', '_low_', '3'] + - [float32x4_t, float16x8_t, float16x4_t, 'q_lane', 'q_low_', '2'] + - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq', 'q_low_', '3'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]] - FnCall: - "vfmlsl{type[4]}{neon_type[1]}" - - r - a - - FnCall: ["vdup{neon_type[1].N}", [{FnCall: [simd_extract!, [b, 'LANE as u32']]}]] + - FnCall: ["vdup{type[3]}_{neon_type[2]}", [b], [LANE]] - name: "vamax{neon_type.no}" doc: "Multi-vector floating-point absolute maximum" @@ -14352,3 +14175,49 @@ intrinsics: links: - link: "llvm.aarch64.fjcvtzs" arch: aarch64,arm64ec + + - name: "{type[0]}" + doc: "Duplicate vector element to vector or scalar" + arguments: ["a: {type[1]}"] + return_type: "{neon_type[2]}" + attr: + - *neon-stable + assert_instr: ['{type[3]}'] + safety: safe + big_endian_inverse: true + types: + - ['vget_high_f64', 'float64x2_t', 'float64x1_t', 'nop', 'float64x1_t([simd_extract!(a, 1)])'] + - ['vget_low_f64', 'float64x2_t', 'float64x1_t', 'nop', 'float64x1_t([simd_extract!(a, 0)])'] + compose: + - Identifier: ['{type[4]}', UnsafeSymbol] + + - name: "vcombine{neon_type[0].noq}" + doc: Join two smaller vectors into a single larger vector + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[1]}" + attr: + - *neon-stable + assert_instr: [mov] + safety: safe + big_endian_inverse: true + types: + - [float64x1_t, float64x2_t, '[0, 1]'] + compose: + - FnCall: [simd_shuffle!, [a, b, '{type[2]}']] + + - name: "vgetq_lane_{type[1]}" + doc: "Duplicate vector element to vector or scalar" + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-stable + - FnCall: [rustc_legacy_const_generics, ['1']] + assert_instr: [['nop', 'IMM5 = 0']] + safety: safe + big_endian_inverse: true + static_defs: ["const IMM5: i32"] + types: + - ['float64x2_t', 'f64'] + compose: + - FnCall: [static_assert_uimm_bits!, [IMM5, "1"]] + - FnCall: ['simd_extract!', [a, 'IMM5 as u32'], [] , true] diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml index 7b8ddf43742d2..e8682cf45feb8 100644 --- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml +++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml @@ -104,6 +104,10 @@ aarch64-crc-stable: &aarch64-crc-stable neon-unstable-f16: &neon-unstable-f16 FnCall: [unstable, ['feature = "stdarch_neon_f16"', 'issue = "136306"']] +# all(target_endian = "little") +all-neon-target-aarch64-arm64ec-little-endian: &all-neon-target-aarch64-arm64ec-little-endian + FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}, 'target_endian = "little"']] + intrinsics: - name: "vand{neon_type.no}" doc: Vector bitwise and @@ -502,11 +506,11 @@ intrinsics: - ['h_f16', 'f16'] compose: - FnCall: - - simd_extract! + - 'vget_lane_{type[1]}' - - FnCall: - "vabs_{type[1]}" - - FnCall: ["vdup_n_{type[1]}", [a]] - - 0 + - - 0 - name: "vcgt{neon_type[0].no}" doc: "Compare signed greater than" @@ -1438,6 +1442,7 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - [_lane_s8, int8x8_t, int8x8_t, '3', '[N as u32; 8]'] - [q_lane_s8, int8x8_t, int8x16_t, '3', '[N as u32; 16]'] @@ -1462,6 +1467,7 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - [q_laneq_s8, int8x16_t, int8x16_t, '4', '[N as u32; 16]'] - [_laneq_s8, int8x16_t, int8x8_t, '4', '[N as u32; 8]'] @@ -1486,6 +1492,7 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - [_lane_s16, int16x4_t, int16x4_t, '2', '[N as u32; 4]'] - [q_lane_s16, int16x4_t, int16x8_t, '2', '[N as u32; 8]'] @@ -1510,6 +1517,7 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - [q_laneq_s16, int16x8_t, int16x8_t, '3', '[N as u32; 8]'] - [_laneq_s16, int16x8_t, int16x4_t, '3', '[N as u32; 4]'] @@ -1537,6 +1545,7 @@ intrinsics: - *target-not-arm64ec static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - [q_laneq_f16, float16x8_t, float16x8_t, '3', '[N as u32; 8]'] - [_laneq_f16, float16x8_t, float16x4_t, '3', '[N as u32; 4]'] @@ -1577,6 +1586,7 @@ intrinsics: - *target-not-arm64ec static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - [_lane_f16, float16x4_t, float16x4_t, '2', '[N as u32; 4]'] - [q_lane_f16, float16x4_t, float16x8_t, '2', '[N as u32; 8]'] @@ -1598,6 +1608,7 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - [_lane_s32, int32x2_t, int32x2_t, '1', '[N as u32, N as u32]'] - [q_lane_s32, int32x2_t, int32x4_t, '1', '[N as u32, N as u32, N as u32, N as u32]'] @@ -1622,6 +1633,7 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - [q_laneq_s32, int32x4_t, int32x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]'] - [_laneq_s32, int32x4_t, int32x2_t, '2', '[N as u32, N as u32]'] @@ -1646,6 +1658,7 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - [q_laneq_s64, int64x2_t, '1', '[N as u32, N as u32]'] - [q_laneq_u64, uint64x2_t, '1', '[N as u32, N as u32]'] @@ -1666,6 +1679,7 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - [q_lane_s64, int64x1_t, int64x2_t] - [q_lane_u64, uint64x1_t, uint64x2_t] @@ -1712,13 +1726,13 @@ intrinsics: compose: - FnCall: [static_assert_uimm_bits!, [N, 1]] - FnCall: - - "transmute{type[3]}" - - - FnCall: [simd_extract!, [a, 'N as u32']] + - "transmute" + - - FnCall: ['vget{neon_type[1].lane_nox}', [a], [N]] - - name: "vext{neon_type[0].no}" + - name: "vext{neon_type.no}" doc: "Extract vector from pair of vectors" - arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] - return_type: "{neon_type[0]}" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" attr: - *neon-v7 - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 7']]}]] @@ -1728,21 +1742,22 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - - [int8x8_t, 'match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }'] - - [int16x8_t, 'match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }'] - - [uint8x8_t, 'match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }'] - - [uint16x8_t, 'match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }'] - - [poly8x8_t, 'match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }'] - - [poly16x8_t, 'match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }'] + - int8x8_t + - int16x8_t + - uint8x8_t + - uint16x8_t + - poly8x8_t + - poly16x8_t compose: - FnCall: [static_assert_uimm_bits!, [N, 3]] - - Identifier: ["{type[1]}", UnsafeSymbol] + - FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3, N as u32 + 4, N as u32 + 5, N as u32 + 6, N as u32 + 7]']] - - name: "vext{neon_type[0].no}" + - name: "vext{neon_type.no}" doc: "Extract vector from pair of vectors" - arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] - return_type: "{neon_type[0]}" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" attr: - *neon-v7 - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 15']]}]] @@ -1752,18 +1767,19 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - - [int8x16_t, 'match N & 0b1111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), 8 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), 9 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), 10 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), 11 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), 12 => simd_shuffle!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), 13 => simd_shuffle!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), 14 => simd_shuffle!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), 15 => simd_shuffle!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), _ => unreachable_unchecked(), }'] - - [uint8x16_t, 'match N & 0b1111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), 8 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), 9 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), 10 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), 11 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), 12 => simd_shuffle!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), 13 => simd_shuffle!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), 14 => simd_shuffle!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), 15 => simd_shuffle!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), _ => unreachable_unchecked(), }'] - - [poly8x16_t, 'match N & 0b1111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), 8 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), 9 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), 10 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), 11 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), 12 => simd_shuffle!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), 13 => simd_shuffle!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), 14 => simd_shuffle!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), 15 => simd_shuffle!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), _ => unreachable_unchecked(), }'] + - int8x16_t + - uint8x16_t + - poly8x16_t compose: - FnCall: [static_assert_uimm_bits!, [N, 4]] - - Identifier: ["{type[1]}", UnsafeSymbol] + - FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3, N as u32 + 4, N as u32 + 5, N as u32 + 6, N as u32 + 7, N as u32 + 8, N as u32 + 9, N as u32 + 10, N as u32 + 11, N as u32 + 12, N as u32 + 13, N as u32 + 14, N as u32 + 15]']] - - name: "vext{neon_type[0].no}" + - name: "vext{neon_type.no}" doc: "Extract vector from pair of vectors" - arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] - return_type: "{neon_type[0]}" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" attr: - *neon-v7 - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 3']]}]] @@ -1773,22 +1789,23 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - - [int16x4_t,'match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }'] - - [int32x4_t, 'match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }'] - - [uint16x4_t, 'match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }'] - - [uint32x4_t, 'match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }'] - - [poly16x4_t, 'match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }'] - - [float32x4_t, 'match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }'] + - int16x4_t + - int32x4_t + - uint16x4_t + - uint32x4_t + - poly16x4_t + - float32x4_t compose: - FnCall: [static_assert_uimm_bits!, [N, 2]] - - Identifier: ["{type[1]}", UnsafeSymbol] + - FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]']] - - name: "vext{neon_type[0].no}" + - name: "vext{neon_type.no}" doc: "Extract vector from pair of vectors" - arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] - return_type: "{neon_type[0]}" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" attr: - *neon-v7 - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 3']]}]] @@ -1800,16 +1817,17 @@ intrinsics: - *target-not-arm64ec static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - - [float16x4_t, 'match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }'] + - float16x4_t compose: - FnCall: [static_assert_uimm_bits!, [N, 2]] - - Identifier: ["{type[1]}", UnsafeSymbol] + - FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]']] - - name: "vext{neon_type[0].no}" + - name: "vext{neon_type.no}" doc: "Extract vector from pair of vectors" - arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] - return_type: "{neon_type[0]}" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" attr: - *neon-v7 - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 7']]}]] @@ -1821,18 +1839,19 @@ intrinsics: - *target-not-arm64ec static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - - [float16x8_t, 'match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }'] + - float16x8_t compose: - FnCall: [static_assert_uimm_bits!, [N, 3]] - - Identifier: ["{type[1]}", UnsafeSymbol] + - FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3, N as u32 + 4, N as u32 + 5, N as u32 + 6, N as u32 + 7]']] - - name: "vext{neon_type[0].no}" + - name: "vext{neon_type.no}" doc: "Extract vector from pair of vectors" - arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] - return_type: "{neon_type[0]}" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" attr: - *neon-v7 - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 1']]}]] @@ -1842,18 +1861,19 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - - [int32x2_t, 'match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }'] - - [uint32x2_t, 'match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }'] - - [float32x2_t, 'match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }'] + - int32x2_t + - uint32x2_t + - float32x2_t compose: - FnCall: [static_assert_uimm_bits!, [N, 1]] - - Identifier: ["{type[1]}", UnsafeSymbol] + - FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1]']] - - name: "vext{neon_type[0].no}" + - name: "vext{neon_type.no}" doc: "Extract vector from pair of vectors" - arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] - return_type: "{neon_type[0]}" + arguments: ["a: {neon_type}", "b: {neon_type}"] + return_type: "{neon_type}" attr: - *neon-v7 - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmov, 'N = 1']]}]] @@ -1863,12 +1883,13 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const N: i32'] safety: safe + big_endian_inverse: true types: - - [int64x2_t, 'match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }'] - - [uint64x2_t, 'match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }'] + - int64x2_t + - uint64x2_t compose: - FnCall: [static_assert_uimm_bits!, [N, 1]] - - Identifier: ["{type[1]}", UnsafeSymbol] + - FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1]']] - name: "vmla{neon_type[0].no}" doc: "Multiply-add to accumulator" @@ -1988,17 +2009,17 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [int32x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [int32x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [int64x2_t, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]'] - - [int64x2_t, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]'] + - [int32x4_t, int16x4_t, int16x4_t, '2'] + - [int32x4_t, int16x4_t, int16x8_t, '3'] + - [int64x2_t, int32x2_t, int32x2_t, '1'] + - [int64x2_t, int32x2_t, int32x4_t, '2'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]] - FnCall: - "vmlal_{neon_type[1]}" - - a - b - - FnCall: [simd_shuffle!, [c, c, '{type[4]}']] + - FnCall: ['vdup_lane{neon_type[2].no}', [c], [LANE]] - name: "vmlal_lane{neon_type[2].no}" doc: "Vector widening multiply accumulate with scalar" @@ -2014,17 +2035,17 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [uint32x4_t, uint16x4_t, uint16x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [uint32x4_t, uint16x4_t, uint16x8_t, uint32x4_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [uint64x2_t, uint32x2_t, uint32x2_t, uint64x2_t, '1', '[LANE as u32, LANE as u32]'] - - [uint64x2_t, uint32x2_t, uint32x4_t, uint64x2_t, '2', '[LANE as u32, LANE as u32]'] + - [uint32x4_t, uint16x4_t, uint16x4_t, '2'] + - [uint32x4_t, uint16x4_t, uint16x8_t, '3'] + - [uint64x2_t, uint32x2_t, uint32x2_t, '1'] + - [uint64x2_t, uint32x2_t, uint32x4_t, '2'] compose: - - FnCall: [static_assert_uimm_bits!, [LANE, "{type[4]}"]] + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]] - FnCall: - "vmlal_{neon_type[1]}" - - a - b - - FnCall: [simd_shuffle!, [c, c, '{type[5]}']] + - FnCall: ['vdup_lane{neon_type[2].no}', [c], [LANE]] - name: "vmlal_{neon_type[1]}" doc: "Unsigned multiply-add long" @@ -2143,15 +2164,15 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [int32x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [int32x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int32x4_t, int16x4_t, int16x4_t, '2'] + - [int32x4_t, int16x4_t, int16x8_t, '3'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] - FnCall: - "vmlsl_{neon_type[1]}" - - a - b - - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]] + - FnCall: ['vdup_lane{neon_type[2].no}', [c], [LANE]] - name: "vmlsl_lane{neon_type[2].no}" doc: "Vector widening multiply subtract with scalar" @@ -2167,15 +2188,15 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [int64x2_t, int32x2_t, int32x2_t, '[LANE as u32, LANE as u32]', '1'] - - [int64x2_t, int32x2_t, int32x4_t, '[LANE as u32, LANE as u32]', '2'] + - [int64x2_t, int32x2_t, int32x2_t, '1'] + - [int64x2_t, int32x2_t, int32x4_t, '2'] compose: - - FnCall: [static_assert_uimm_bits!, [LANE, "{type[4]}"]] + - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]] - FnCall: - "vmlsl_{neon_type[1]}" - - a - b - - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]] + - FnCall: ['vdup_lane{neon_type[2].no}', [c], [LANE]] - name: "vmlsl_lane{neon_type[2].no}" doc: "Vector widening multiply subtract with scalar" @@ -2191,17 +2212,17 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [uint32x4_t, uint16x4_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [uint32x4_t, uint16x4_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [uint64x2_t, uint32x2_t, uint32x2_t, '1', '[LANE as u32, LANE as u32]'] - - [uint64x2_t, uint32x2_t, uint32x4_t, '2', '[LANE as u32, LANE as u32]'] + - [uint32x4_t, uint16x4_t, uint16x4_t, '2'] + - [uint32x4_t, uint16x4_t, uint16x8_t, '3'] + - [uint64x2_t, uint32x2_t, uint32x2_t, '1'] + - [uint64x2_t, uint32x2_t, uint32x4_t, '2'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]] - FnCall: - "vmlsl_{neon_type[1]}" - - a - b - - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]] + - FnCall: ['vdup_lane{neon_type[2].no}', [c], [LANE]] - name: "vmlsl_{neon_type[1]}" doc: "Unsigned multiply-subtract long" @@ -2681,7 +2702,6 @@ intrinsics: - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld]]}]] - *neon-not-arm-stable - *neon-cfg-arm-unstable - big_endian_inverse: false safety: unsafe: [neon] types: @@ -2740,7 +2760,6 @@ intrinsics: - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld]]}]] - *neon-not-arm-stable - *neon-cfg-arm-unstable - big_endian_inverse: false safety: unsafe: [neon] types: @@ -2978,7 +2997,6 @@ intrinsics: - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld2]]}]] - *neon-not-arm-stable - *neon-cfg-arm-unstable - big_endian_inverse: false safety: unsafe: [neon] types: @@ -3009,7 +3027,6 @@ intrinsics: - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] - *neon-not-arm-stable - *neon-cfg-arm-unstable - big_endian_inverse: false safety: unsafe: [neon] types: @@ -3106,7 +3123,6 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: - "const LANE: i32" - big_endian_inverse: false safety: unsafe: [neon] types: @@ -4095,7 +4111,6 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable static_defs: ['const LANE: i32'] - big_endian_inverse: false safety: unsafe: [neon] types: @@ -4126,7 +4141,6 @@ intrinsics: - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld3]]}]] - *neon-not-arm-stable - *neon-cfg-arm-unstable - big_endian_inverse: false safety: unsafe: [neon] types: @@ -4490,7 +4504,6 @@ intrinsics: - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld4]]}]] - *neon-not-arm-stable - *neon-cfg-arm-unstable - big_endian_inverse: false safety: unsafe: [neon] types: @@ -4612,7 +4625,6 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable static_defs: ["const LANE: i32"] - big_endian_inverse: false safety: unsafe: [neon] types: @@ -6139,10 +6151,11 @@ intrinsics: - *neon-i8mm - *neon-v8 - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vusdot]]}]] - - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [usdot]]}]] + - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [usdot]]}]] - *neon-unstable-i8mm - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - [int32x2_t, uint8x8_t, int8x8_t] - [int32x4_t, uint8x16_t, int8x16_t] @@ -6155,10 +6168,10 @@ intrinsics: - link: "llvm.arm.neon.usdot.v{neon_type[0].lane}i32.v{neon_type[1].lane}i8" arch: arm - - name: "vusdot{type[0]}" + - name: "vusdot{neon_type[0].lane_nox}" doc: "Dot product index form with unsigned and signed integers" - arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}", "c: int8x8_t"] - return_type: "{neon_type[1]}" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: int8x8_t"] + return_type: "{neon_type[0]}" attr: - *neon-i8mm - *neon-v8 @@ -6168,22 +6181,19 @@ intrinsics: - *neon-unstable-i8mm - *neon-cfg-arm-unstable static_defs: ["const LANE: i32"] - big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure. safety: safe types: - - ['_lane_s32', int32x2_t, uint8x8_t, '[LANE as u32, LANE as u32]',''] - - ['q_lane_s32', int32x4_t, uint8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q'] + - [int32x2_t, uint8x8_t, ''] + - [int32x4_t, uint8x16_t, 'q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '1']] - Let: - c - - int32x2_t - - FnCall: [transmute, [c]] #- FnCall: ['vreinterpret_s32_s8', [c]] + - FnCall: ['vreinterpret_s32_s8', [c]] - Let: - c - - "{type[1]}" - - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]] - - FnCall: ["vusdot{neon_type[1].no}", [a, b, {FnCall: [transmute, [c]]}]] #'vreinterpret{type[4]}_s8_s32' + - FnCall: ['vdup{neon_type[0].lane_nox}', [c], [LANE]] + - FnCall: ["vusdot{neon_type[0].no}", [a, b, {FnCall: ['vreinterpret{type[2]}_s8_s32', [c]]}]] - name: "vsudot{neon_type[0].lane_nox}" doc: "Dot product index form with signed and unsigned integers" @@ -6193,30 +6203,27 @@ intrinsics: - *neon-i8mm - *neon-v8 - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vsudot, 'LANE = 0']]}]] - - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sudot, 'LANE = 0']]}]] + - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [sudot, 'LANE = 0']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - *neon-unstable-i8mm - *neon-cfg-arm-unstable static_defs: ["const LANE: i32"] - big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure. safety: safe types: - - [int32x2_t, int8x8_t, uint8x8_t, '[LANE as u32, LANE as u32]', uint32x2_t,''] - - [int32x4_t, int8x16_t, uint8x8_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]', uint32x4_t,'q'] + - [int32x2_t, int8x8_t, uint8x8_t, uint32x2_t, ''] + - [int32x4_t, int8x16_t, uint8x8_t, uint32x4_t, 'q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '1']] - Let: - c - - uint32x2_t - - FnCall: [transmute, [c]] #- FnCall: ['vreinterpret_u32_u8', [c]] + - FnCall: ['vreinterpret_u32_u8', [c]] - Let: - c - - "{type[4]}" - - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]] + - FnCall: ['vdup{neon_type[3].lane_nox}', [c], [LANE]] - FnCall: - "vusdot{neon_type[0].no}" - - a - - FnCall: [transmute, [c]] #- FnCall: ['vreinterpret{type[5]}_u8_u32', [c]] + - FnCall: ['vreinterpret{type[4]}_u8_u32', [c]] - b - name: "vmul{neon_type[1].no}" @@ -6298,20 +6305,20 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - [int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]'] - - [int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [uint16x4_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [uint16x8_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [uint32x2_t, uint32x2_t, '1', '[LANE as u32, LANE as u32]'] - - [uint32x4_t, uint32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int16x4_t, int16x4_t, '2'] + - [int16x8_t, int16x4_t, '2'] + - [int32x2_t, int32x2_t, '1'] + - [int32x4_t, int32x2_t, '1'] + - [uint16x4_t, uint16x4_t, '2'] + - [uint16x8_t, uint16x4_t, '2'] + - [uint32x2_t, uint32x2_t, '1'] + - [uint32x4_t, uint32x2_t, '1'] compose: - FnCall: ["static_assert_uimm_bits!", [LANE, "{type[2]}"]] - FnCall: - simd_mul - - a - - FnCall: ["simd_shuffle!", [b, b, "{type[3]}"]] + - FnCall: ["vdup{neon_type[0].lane_nox}", [b], [LANE]] - name: "vmul{neon_type[0].lane_nox}" @@ -6330,14 +6337,14 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - [float16x4_t, float16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [float16x8_t, float16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [float16x4_t, float16x4_t, '2'] + - [float16x8_t, float16x4_t, '2'] compose: - FnCall: ["static_assert_uimm_bits!", [LANE, "{type[2]}"]] - FnCall: - simd_mul - - a - - FnCall: ["simd_shuffle!", [v, v, "{type[3]}"]] + - FnCall: ["vdup{neon_type[0].lane_nox}", [v], [LANE]] - name: "vmul{neon_type[0].laneq_nox}" @@ -6354,20 +6361,20 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - [int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]'] - - [int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [uint16x4_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [uint16x8_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [uint32x2_t, uint32x4_t, '2', '[LANE as u32, LANE as u32]'] - - [uint32x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int16x4_t, int16x8_t, '3'] + - [int16x8_t, int16x8_t, '3'] + - [int32x2_t, int32x4_t, '2'] + - [int32x4_t, int32x4_t, '2'] + - [uint16x4_t, uint16x8_t, '3'] + - [uint16x8_t, uint16x8_t, '3'] + - [uint32x2_t, uint32x4_t, '2'] + - [uint32x4_t, uint32x4_t, '2'] compose: - FnCall: ["static_assert_uimm_bits!", [LANE, "{type[2]}"]] - FnCall: - simd_mul - - a - - FnCall: ["simd_shuffle!", [b, b, "{type[3]}"]] + - FnCall: ["vdup{neon_type[0].laneq_nox}", [b], [LANE]] - name: "vmull{neon_type[1].no}" doc: Signed multiply long @@ -6798,23 +6805,22 @@ intrinsics: attr: - *neon-v7 - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ["vsubhn"]]}]] - - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [subhn2]]}]] + - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [subhn2]]}]] - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe types: - - [int8x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - [int16x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]'] - - [int32x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]'] - - [uint8x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - [uint16x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]'] - - [uint32x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]'] + - [int8x8_t, int16x8_t, int8x16_t] + - [int16x4_t, int32x4_t, int16x8_t] + - [int32x2_t, int64x2_t, int32x4_t] + - [uint8x8_t, uint16x8_t, uint8x16_t] + - [uint16x4_t, uint32x4_t, uint16x8_t] + - [uint32x2_t, uint64x2_t, uint32x4_t] compose: - Let: - d - - "{neon_type[0]}" - FnCall: ["vsubhn{neon_type[1].noq}", [b, c]] - - FnCall: [simd_shuffle!, [a, d, "{type[3]}"]] + - FnCall: ['vcombine_{neon_type[0]}', [a, d]] - name: "vhsub{neon_type[1].no}" doc: "Signed halving subtract" @@ -6968,7 +6974,6 @@ intrinsics: doc: "Dot product index form with unsigned and signed integers" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] return_type: "{neon_type[0]}" - big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure. attr: - *neon-v8 - *neon-i8mm @@ -6979,14 +6984,13 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - [int32x2_t, uint8x8_t, int8x16_t, '[LANE as u32, LANE as u32]',''] - - [int32x4_t, uint8x16_t, int8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q'] + - [int32x2_t, uint8x8_t, int8x16_t, ''] + - [int32x4_t, uint8x16_t, int8x16_t, 'q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '2']] - - Let: [c, int32x4_t, {FnCall: [transmute, [c]]}] - - Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]}] - - FnCall: ["vusdot{neon_type[0].no}", [a, b, {FnCall: [transmute, [c]]}]] - #- FnCall: ["vusdot{neon_type[0].no}", [a, b, {FnCall: ['vreinterpret{type[4]}_s8_s32', [c]]}]] + - Let: [c, {FnCall: [vreinterpretq_s32_s8, [c]]}] + - Let: [c, {FnCall: ['vdup{neon_type[0].laneq_nox}', [c], [LANE]]}] + - FnCall: ["vusdot{neon_type[0].no}", [a, b, {FnCall: ['vreinterpret{type[3]}_s8_s32', [c]]}]] - name: "vsudot{neon_type[0].laneq_nox}" doc: "Dot product index form with signed and unsigned integers" @@ -6996,28 +7000,26 @@ intrinsics: - *neon-v8 - *neon-i8mm - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vsudot, 'LANE = 1']]}]] - - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sudot, 'LANE = 3']]}]] + - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [sudot, 'LANE = 3']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - FnCall: [unstable, ['feature = "stdarch_neon_i8mm"', 'issue = "117223"']] static_defs: ["const LANE: i32"] safety: safe types: - - [int32x2_t, int8x8_t, uint8x16_t, '[LANE as u32, LANE as u32]', uint32x2_t] - - [int32x4_t, int8x16_t, uint8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]', uint32x4_t] + - [int32x2_t, int8x8_t, uint8x16_t, uint32x2_t, ''] + - [int32x4_t, int8x16_t, uint8x16_t, uint32x4_t, 'q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, 2]] - Let: - c - - uint32x4_t - - FnCall: [transmute, [c]] + - FnCall: [vreinterpretq_u32_u8, [c]] - Let: - c - - "{type[4]}" - - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]] + - FnCall: ['vdup{neon_type[3].laneq_nox}', [c], [LANE]] - FnCall: - "vusdot{neon_type[0].no}" - - a - - FnCall: [transmute, [c]] + - FnCall: ['vreinterpret{type[4]}_u8_u32', [c]] - b - name: "vdot{neon_type[0].laneq_nox}" @@ -7025,7 +7027,6 @@ intrinsics: arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] return_type: "{neon_type[0]}" static_defs: ["const LANE: i32"] - big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure. attr: - *neon-v8 - FnCall: [target_feature, ['enable = "neon,dotprod"']] @@ -7035,32 +7036,27 @@ intrinsics: - FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']] safety: safe types: - - [int32x2_t, int8x8_t, int8x16_t, int32x4_t, '[LANE as u32, LANE as u32]', ''] - - [int32x4_t, int8x16_t, int8x16_t, int32x4_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q'] + - [int32x2_t, int8x8_t, int8x16_t, ''] + - [int32x4_t, int8x16_t, int8x16_t, 'q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '2']] - Let: - c - - "{neon_type[3]}" - - FnCall: [transmute, [c]] - #- FnCall: ['vreinterpretq_{neon_type[0]}_{neon_type[1]}', [c]] + - FnCall: [vreinterpretq_s32_s8, [c]] - Let: - c - - "{neon_type[0]}" - - FnCall: [simd_shuffle!, [c, c, '{type[4]}']] + - FnCall: ['vdup{neon_type[0].laneq_nox}', [c], [LANE]] - FnCall: - "vdot{neon_type[0].no}" - - a - b - - FnCall: [transmute, [c]] - #- FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]] + - FnCall: ['vreinterpret{type[3]}_s8_s32', [c]] - name: "vdot{neon_type[0].laneq_nox}" doc: Dot product arithmetic (indexed) arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"] return_type: "{neon_type[0]}" static_defs: ["const LANE: i32"] - big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure. attr: - *neon-v8 - FnCall: [target_feature, ['enable = "neon,dotprod"']] @@ -7070,25 +7066,21 @@ intrinsics: - FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']] safety: safe types: - - [uint32x2_t, uint8x8_t, uint8x16_t, uint32x4_t, '[LANE as u32, LANE as u32]',''] - - [uint32x4_t, uint8x16_t, uint8x16_t, uint32x4_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q'] + - [uint32x2_t, uint8x8_t, uint8x16_t, ''] + - [uint32x4_t, uint8x16_t, uint8x16_t, 'q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '2']] - Let: - c - - "{neon_type[3]}" - - FnCall: [transmute, [c]] - #- FnCall: ['vreinterpretq_{neon_type[0]}_{neon_type[1]}', [c]] + - FnCall: ['vreinterpretq_u32_u8', [c]] - Let: - c - - "{neon_type[0]}" - - FnCall: [simd_shuffle!, [c, c, '{type[4]}']] + - FnCall: ['vdup{neon_type[0].laneq_nox}', [c], [LANE]] - FnCall: - "vdot{neon_type[0].no}" - - a - b - - FnCall: [transmute, [c]] - #- FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]] + - FnCall: ['vreinterpret{type[3]}_u8_u32', [c]] - name: "vdot{neon_type[0].no}" doc: Dot product arithmetic (vector) @@ -7102,6 +7094,7 @@ intrinsics: - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]] - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - [int32x2_t, int8x8_t] - [int32x4_t, int8x16_t] @@ -7126,6 +7119,7 @@ intrinsics: - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]] - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - [uint32x2_t, uint8x8_t] - [uint32x4_t, uint8x16_t] @@ -7151,26 +7145,23 @@ intrinsics: - FnCall: [rustc_legacy_const_generics, ['3']] - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]] - *neon-cfg-arm-unstable - big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure. safety: safe types: - - [int32x2_t, int8x8_t, int8x8_t, int32x2_t, '[LANE as u32, LANE as u32]',''] - - [int32x4_t, int8x16_t, int8x8_t, int32x2_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q'] + - [int32x2_t, int8x8_t, int8x8_t, ''] + - [int32x4_t, int8x16_t, int8x8_t, 'q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '1']] - Let: - c - - "{neon_type[3]}" - - FnCall: [transmute, [c]] + - FnCall: ['vreinterpret_s32_s8', [c]] - Let: - c - - "{neon_type[0]}" - - FnCall: [simd_shuffle!, [c, c, '{type[4]}']] + - FnCall: ['vdup{neon_type[0].lane_nox}', [c], [LANE]] - FnCall: - "vdot{neon_type[0].no}" - - a - b - - FnCall: [transmute, [c]] + - FnCall: ['vreinterpret{type[3]}_s8_s32', [c]] - name: "vdot{neon_type[0].lane_nox}" doc: Dot product arithmetic (indexed) @@ -7186,25 +7177,22 @@ intrinsics: - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]] - *neon-cfg-arm-unstable safety: safe - big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure. types: - - [uint32x2_t, uint8x8_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32]',''] - - [uint32x4_t, uint8x16_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q'] + - [uint32x2_t, uint8x8_t, uint8x8_t, ''] + - [uint32x4_t, uint8x16_t, uint8x8_t, 'q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '1']] - Let: - c - - "{neon_type[3]}" - - FnCall: [transmute, [c]] #- FnCall: ['vreinterpret_{neon_type[0]}_{neon_type[1]}', [c]] + - FnCall: ['vreinterpret_u32_u8', [c]] - Let: - c - - "{neon_type[0]}" - - FnCall: [simd_shuffle!, [c, c, '{type[4]}']] + - FnCall: ['vdup{neon_type[0].lane_nox}', [c], [LANE]] - FnCall: - "vdot{neon_type[0].no}" - - a - b - - FnCall: [transmute, [c]] #- FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]] + - FnCall: ['vreinterpret{type[3]}_u8_u32', [c]] - name: "vmax{neon_type.no}" doc: Maximum (vector) @@ -7505,6 +7493,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - float32x2_t compose: @@ -7529,6 +7518,7 @@ intrinsics: - *neon-cfg-arm-unstable - *target-not-arm64ec safety: safe + big_endian_inverse: true types: - float16x4_t compose: @@ -7598,7 +7588,7 @@ intrinsics: - [int16x4_t, int16x4_t, int32x4_t, '[N as u32; 4]'] compose: - FnCall: [static_assert_uimm_bits!, [N, '2']] - - Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}] + - Let: [b, {FnCall: ['vdup{neon_type[0].lane_nox}', [b], [N]]}] - FnCall: [vqdmull_s16, [a, b]] - name: "vqdmull_lane_s32" @@ -7615,10 +7605,10 @@ intrinsics: static_defs: ['const N: i32'] safety: safe types: - - [int32x2_t, int32x2_t, int64x2_t, '[N as u32; 2]'] + - [int32x2_t, int32x2_t, int64x2_t] compose: - FnCall: [static_assert_uimm_bits!, [N, '1']] - - Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}] + - Let: [b, {FnCall: ['vdup{neon_type[0].lane_nox}', [b], [N]]}] - FnCall: [vqdmull_s32, [a, b]] - name: "vqdmlal{neon_type[1].noq}" @@ -7662,7 +7652,7 @@ intrinsics: attr: - *neon-v7 - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmlal, N = 2]]}]] - - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmlal, 'N = 2']]}]] + - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [sqdmlal, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - *neon-not-arm-stable - *neon-cfg-arm-unstable @@ -7681,7 +7671,7 @@ intrinsics: attr: - *neon-v7 - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmlal, N = 1]]}]] - - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmlal, 'N = 1']]}]] + - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [sqdmlal, 'N = 1']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - *neon-not-arm-stable - *neon-cfg-arm-unstable @@ -7734,7 +7724,7 @@ intrinsics: attr: - *neon-v7 - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmlsl, N = 2]]}]] - - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmlsl, 'N = 2']]}]] + - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [sqdmlsl, 'N = 2']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - *neon-not-arm-stable - *neon-cfg-arm-unstable @@ -7753,7 +7743,7 @@ intrinsics: attr: - *neon-v7 - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmlsl, N = 1]]}]] - - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmlsl, 'N = 1']]}]] + - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [sqdmlsl, 'N = 1']]}]] - FnCall: [rustc_legacy_const_generics, ['3']] - *neon-not-arm-stable - *neon-cfg-arm-unstable @@ -8500,32 +8490,46 @@ intrinsics: - *neon-cfg-arm-unstable safety: safe types: - - [poly64x1_t, int32x2_t] - - [poly64x1_t, uint32x2_t] - - [poly64x2_t, int32x4_t] - - [poly64x2_t, uint32x4_t] - [p128, int64x2_t] - [p128, uint64x2_t] - [p128, poly64x2_t] - - [poly8x16_t, p128] - [p128, int8x16_t] - [p128, uint8x16_t] - [p128, poly8x16_t] + - [poly64x2_t, p128] + - [p128, int32x4_t] + - [p128, uint32x4_t] + - [p128, int16x8_t] + - [p128, uint16x8_t] + - [p128, poly16x8_t] + compose: + - FnCall: [transmute, [a]] + + - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" + doc: Vector reinterpret cast operation + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [poly64x1_t, int32x2_t] + - [poly64x1_t, uint32x2_t] + - [poly8x16_t, p128] - [int32x2_t, poly64x1_t] - [uint32x2_t, poly64x1_t] - [int32x4_t, poly64x2_t] - [uint32x4_t, poly64x2_t] - [int64x2_t, p128] - [uint64x2_t, p128] - - [poly64x2_t, p128] - [poly64x1_t, int16x4_t] - [poly64x1_t, uint16x4_t] - [poly64x1_t, poly16x4_t] - - [poly64x2_t, int16x8_t] - - [poly64x2_t, uint16x8_t] - [poly64x2_t, poly16x8_t] - - [p128, int32x4_t] - - [p128, uint32x4_t] - [poly16x4_t, poly64x1_t] - [int16x4_t, poly64x1_t] - [uint16x4_t, poly64x1_t] @@ -8537,12 +8541,7 @@ intrinsics: - [poly64x1_t, int8x8_t] - [poly64x1_t, uint8x8_t] - [poly64x1_t, poly8x8_t] - - [poly64x2_t, int8x16_t] - - [poly64x2_t, uint8x16_t] - [poly64x2_t, poly8x16_t] - - [p128, int16x8_t] - - [p128, uint16x8_t] - - [p128, poly16x8_t] - [poly8x8_t, poly64x1_t] - [int8x8_t, poly64x1_t] - [uint8x8_t, poly64x1_t] @@ -8554,53 +8553,12 @@ intrinsics: - [poly16x8_t, p128] - [int8x16_t, p128] - [uint8x16_t, p128] - compose: - - FnCall: [transmute, [a]] - - - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" - doc: Vector reinterpret cast operation - arguments: ["a: {type[0]}"] - return_type: "{type[1]}" - attr: - - *neon-v7 - - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] - - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] - - *neon-not-arm-stable - - *neon-cfg-arm-unstable - safety: safe - types: - - [uint8x8_t, int8x8_t] - - [poly8x8_t, int8x8_t] - - [poly16x4_t, int16x4_t] - - [uint16x4_t, int16x4_t] - - [uint32x2_t, int32x2_t] - - [uint64x1_t, int64x1_t] - - [uint8x16_t, int8x16_t] - - [poly8x16_t, int8x16_t] - - [poly16x8_t, int16x8_t] - - [uint16x8_t, int16x8_t] - - [uint32x4_t, int32x4_t] - - [uint64x2_t, int64x2_t] - - [poly8x8_t, uint8x8_t] - - [int8x8_t, uint8x8_t] - - [poly16x4_t, uint16x4_t] - - [int16x4_t, uint16x4_t] - - [int32x2_t, uint32x2_t] - - [int64x1_t, uint64x1_t] - - [poly8x16_t, uint8x16_t] - - [int8x16_t, uint8x16_t] - - [poly16x8_t, uint16x8_t] - - [int16x8_t, uint16x8_t] - - [int32x4_t, uint32x4_t] - - [int64x2_t, uint64x2_t] - - [int8x8_t, poly8x8_t] - - [uint8x8_t, poly8x8_t] - - [int16x4_t, poly16x4_t] - - [uint16x4_t, poly16x4_t] - - [int8x16_t, poly8x16_t] - - [uint8x16_t, poly8x16_t] - - [int16x8_t, poly16x8_t] - - [uint16x8_t, poly16x8_t] + - [poly64x2_t, int32x4_t] + - [poly64x2_t, uint32x4_t] + - [poly64x2_t, int16x8_t] + - [poly64x2_t, uint16x8_t] + - [poly64x2_t, int8x16_t] + - [poly64x2_t, uint8x16_t] - [int16x4_t, int8x8_t] - [uint16x4_t, int8x8_t] - [poly16x4_t, int8x8_t] @@ -8751,19 +8709,15 @@ intrinsics: - [uint8x16_t, uint64x2_t] - [float32x2_t, int8x8_t] - [float32x2_t, int16x4_t] - - [float32x2_t, int32x2_t] - [float32x2_t, int64x1_t] - [float32x4_t, int8x16_t] - [float32x4_t, int16x8_t] - - [float32x4_t, int32x4_t] - [float32x4_t, int64x2_t] - [float32x2_t, uint8x8_t] - [float32x2_t, uint16x4_t] - - [float32x2_t, uint32x2_t] - [float32x2_t, uint64x1_t] - [float32x4_t, uint8x16_t] - [float32x4_t, uint16x8_t] - - [float32x4_t, uint32x4_t] - [float32x4_t, uint64x2_t] - [float32x2_t, poly8x8_t] - [float32x2_t, poly16x4_t] @@ -8772,19 +8726,15 @@ intrinsics: - [float32x4_t, p128] - [int8x8_t, float32x2_t] - [int16x4_t, float32x2_t] - - [int32x2_t, float32x2_t] - [int64x1_t, float32x2_t] - [int8x16_t, float32x4_t] - [int16x8_t, float32x4_t] - - [int32x4_t, float32x4_t] - [int64x2_t, float32x4_t] - [uint8x8_t, float32x2_t] - [uint16x4_t, float32x2_t] - - [uint32x2_t, float32x2_t] - [uint64x1_t, float32x2_t] - [uint8x16_t, float32x4_t] - [uint16x8_t, float32x4_t] - - [uint32x4_t, float32x4_t] - [uint64x2_t, float32x4_t] - [poly8x8_t, float32x2_t] - [poly16x4_t, float32x2_t] @@ -8794,6 +8744,60 @@ intrinsics: compose: - FnCall: [transmute, [a]] + - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" + doc: Vector reinterpret cast operation + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] + - *neon-not-arm-stable + - *neon-cfg-arm-unstable + safety: safe + types: + - [uint8x8_t, int8x8_t] + - [poly8x8_t, int8x8_t] + - [poly16x4_t, int16x4_t] + - [uint16x4_t, int16x4_t] + - [uint32x2_t, int32x2_t] + - [uint64x1_t, int64x1_t] + - [uint8x16_t, int8x16_t] + - [poly8x16_t, int8x16_t] + - [poly16x8_t, int16x8_t] + - [uint16x8_t, int16x8_t] + - [uint32x4_t, int32x4_t] + - [uint64x2_t, int64x2_t] + - [poly8x8_t, uint8x8_t] + - [int8x8_t, uint8x8_t] + - [poly16x4_t, uint16x4_t] + - [int16x4_t, uint16x4_t] + - [int32x2_t, uint32x2_t] + - [int64x1_t, uint64x1_t] + - [poly8x16_t, uint8x16_t] + - [int8x16_t, uint8x16_t] + - [poly16x8_t, uint16x8_t] + - [int16x8_t, uint16x8_t] + - [int32x4_t, uint32x4_t] + - [int64x2_t, uint64x2_t] + - [int8x8_t, poly8x8_t] + - [uint8x8_t, poly8x8_t] + - [int16x4_t, poly16x4_t] + - [uint16x4_t, poly16x4_t] + - [int8x16_t, poly8x16_t] + - [uint8x16_t, poly8x16_t] + - [int16x8_t, poly16x8_t] + - [uint16x8_t, poly16x8_t] + - [float32x2_t, int32x2_t] + - [float32x4_t, int32x4_t] + - [float32x2_t, uint32x2_t] + - [float32x4_t, uint32x4_t] + - [int32x2_t, float32x2_t] + - [int32x4_t, float32x4_t] + - [uint32x2_t, float32x2_t] + - [uint32x4_t, float32x4_t] + compose: + - FnCall: [transmute, [a]] - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" doc: Vector reinterpret cast operation @@ -8810,54 +8814,73 @@ intrinsics: types: # non-q - [float32x2_t, float16x4_t] - - [poly16x4_t, float16x4_t] - [poly8x8_t, float16x4_t] - [int8x8_t, float16x4_t] - - [int16x4_t, float16x4_t] - [int32x2_t, float16x4_t] - [int64x1_t, float16x4_t] - [uint8x8_t, float16x4_t] - - [uint16x4_t, float16x4_t] - [uint32x2_t, float16x4_t] - [uint64x1_t, float16x4_t] - [float16x4_t, float32x2_t] - - [float16x4_t, poly16x4_t] - [float16x4_t, poly8x8_t] - [float16x4_t, int8x8_t] - - [float16x4_t, int16x4_t] - [float16x4_t, int32x2_t] - [float16x4_t, int64x1_t] - [float16x4_t, uint8x8_t] - - [float16x4_t, uint16x4_t] - [float16x4_t, uint32x2_t] - [float16x4_t, uint64x1_t] # q - [float32x4_t, float16x8_t] - - [poly16x8_t, float16x8_t] - [poly8x16_t, float16x8_t] - [int8x16_t, float16x8_t] - - [int16x8_t, float16x8_t] - [int32x4_t, float16x8_t] - [int64x2_t, float16x8_t] - [uint8x16_t, float16x8_t] - - [uint16x8_t, float16x8_t] - [uint32x4_t, float16x8_t] - [uint64x2_t, float16x8_t] - [float16x8_t, float32x4_t] - - [float16x8_t, poly16x8_t] - [float16x8_t, poly8x16_t] - [float16x8_t, int8x16_t] - - [float16x8_t, int16x8_t] - [float16x8_t, int32x4_t] - [float16x8_t, int64x2_t] - [float16x8_t, uint8x16_t] - - [float16x8_t, uint16x8_t] - [float16x8_t, uint32x4_t] - [float16x8_t, uint64x2_t] compose: - FnCall: [transmute, [a]] + - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" + doc: Vector reinterpret cast operation + arguments: ["a: {type[0]}"] + return_type: "{type[1]}" + attr: + - *neon-v7 + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]] + - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]] + - *neon-not-arm-stable-fp16 + - *neon-cfg-arm-unstable + - *target-not-arm64ec + safety: safe + types: + # non-q + - [poly16x4_t, float16x4_t] + - [int16x4_t, float16x4_t] + - [uint16x4_t, float16x4_t] + - [float16x4_t, poly16x4_t] + - [float16x4_t, int16x4_t] + - [float16x4_t, uint16x4_t] + # q + - [poly16x8_t, float16x8_t] + - [int16x8_t, float16x8_t] + - [uint16x8_t, float16x8_t] + - [float16x8_t, poly16x8_t] + - [float16x8_t, int16x8_t] + - [float16x8_t, uint16x8_t] + compose: + - FnCall: [transmute, [a]] + + - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}" doc: Vector reinterpret cast operation arguments: ["a: {type[0]}"] @@ -8894,6 +8917,7 @@ intrinsics: - *neon-cfg-arm-unstable - *target-not-arm64ec safety: safe + big_endian_inverse: true types: - [float16x4_t, "[3, 2, 1, 0]"] - [float16x8_t, "[3, 2, 1, 0, 7, 6, 5, 4]"] @@ -9246,6 +9270,7 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const LANE: i32'] safety: safe + big_endian_inverse: true types: - ["i8", int8x8_t, '3'] - ["i16", int16x4_t, '2'] @@ -9286,6 +9311,7 @@ intrinsics: - *target-not-arm64ec static_defs: ['const LANE: i32'] safety: safe + big_endian_inverse: true types: - ["f16", float16x4_t, '2'] - ["f16", float16x8_t, '3'] @@ -9307,6 +9333,7 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const LANE: i32'] safety: safe + big_endian_inverse: true types: - ["i64", int64x1_t, int64x1_t] - ["u64", uint64x1_t, uint64x1_t] @@ -9328,6 +9355,7 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const LANE: i32'] safety: safe + big_endian_inverse: true types: - ["p64", poly64x1_t, poly64x1_t] compose: @@ -9348,6 +9376,7 @@ intrinsics: - *neon-cfg-arm-unstable static_defs: ['const LANE: i32'] safety: safe + big_endian_inverse: true types: - ["p64", poly64x2_t, poly64x2_t] compose: @@ -9674,6 +9703,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - [int8x8_t, int8x8x2_t, '[0, 8, 2, 10, 4, 12, 6, 14]', '[1, 9, 3, 11, 5, 13, 7, 15]'] - [int16x4_t, int16x4x2_t, '[0, 4, 2, 6]', '[1, 5, 3, 7]'] @@ -9718,6 +9748,7 @@ intrinsics: - *neon-cfg-arm-unstable - *target-not-arm64ec safety: safe + big_endian_inverse: true types: - [float16x4_t, float16x4x2_t, '[0, 4, 2, 6]', '[1, 5, 3, 7]'] - [float16x8_t, float16x8x2_t, '[0, 8, 2, 10, 4, 12, 6, 14]', '[1, 9, 3, 11, 5, 13, 7, 15]'] @@ -9747,6 +9778,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - [int32x2_t, int32x2x2_t, '[0, 2]', '[1, 3]'] - [uint32x2_t, uint32x2x2_t, '[0, 2]', '[1, 3]'] @@ -9776,6 +9808,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - [int8x16_t, int8x16x2_t, '[0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]', '[8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]'] - [int16x8_t, int16x8x2_t, '[0, 8, 1, 9, 2, 10, 3, 11]', '[4, 12, 5, 13, 6, 14, 7, 15]'] @@ -9811,6 +9844,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - [int32x2_t, int32x2x2_t, '[0, 2]', '[1, 3]'] - [uint32x2_t, uint32x2x2_t, '[0, 2]', '[1, 3]'] @@ -9840,6 +9874,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - [int8x8_t, int8x8x2_t, '[0, 8, 1, 9, 2, 10, 3, 11]', '[4, 12, 5, 13, 6, 14, 7, 15]'] - [int16x4_t, int16x4x2_t, '[0, 4, 1, 5]', '[2, 6, 3, 7]'] @@ -9875,6 +9910,7 @@ intrinsics: - *neon-cfg-arm-unstable - *target-not-arm64ec safety: safe + big_endian_inverse: true types: - [float16x4_t, float16x4x2_t, '[0, 4, 1, 5]', '[2, 6, 3, 7]'] - [float16x8_t, float16x8x2_t, '[0, 8, 1, 9, 2, 10, 3, 11]', '[4, 12, 5, 13, 6, 14, 7, 15]'] @@ -9903,6 +9939,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - [int8x8_t, int8x8x2_t, '[0, 2, 4, 6, 8, 10, 12, 14]', '[1, 3, 5, 7, 9, 11, 13, 15]'] - [int16x4_t, int16x4x2_t, '[0, 2, 4, 6]', '[1, 3, 5, 7]'] @@ -9947,6 +9984,7 @@ intrinsics: - *neon-cfg-arm-unstable - *target-not-arm64ec safety: safe + big_endian_inverse: true types: - [float16x4_t, float16x4x2_t, '[0, 2, 4, 6]', '[1, 3, 5, 7]'] - [float16x8_t, float16x8x2_t, '[0, 2, 4, 6, 8, 10, 12, 14]', '[1, 3, 5, 7, 9, 11, 13, 15]'] @@ -9976,6 +10014,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - [float32x2_t, float32x2x2_t, '[0, 2]', '[1, 3]'] - [int32x2_t, int32x2x2_t, '[0, 2]', '[1, 3]'] @@ -10909,21 +10948,21 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [_lane_s16, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [_laneq_s16, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [q_lane_s16, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [q_laneq_s16, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [_lane_u16, uint16x4_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [_laneq_u16, uint16x4_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [q_lane_u16, uint16x8_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [q_laneq_u16, uint16x8_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [_lane_s16, int16x4_t, int16x4_t, '2'] + - [_laneq_s16, int16x4_t, int16x8_t, '3'] + - [q_lane_s16, int16x8_t, int16x4_t, '2'] + - [q_laneq_s16, int16x8_t, int16x8_t, '3'] + - [_lane_u16, uint16x4_t, uint16x4_t, '2'] + - [_laneq_u16, uint16x4_t, uint16x8_t, '3'] + - [q_lane_u16, uint16x8_t, uint16x4_t, '2'] + - [q_laneq_u16, uint16x8_t, uint16x8_t, '3'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] - FnCall: - "vmla{neon_type[1].no}" - - a - b - - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]] + - FnCall: ['vdup{type[0]}', [c], [LANE]] - name: "vmla{type[0]}" doc: "Vector multiply accumulate with scalar" @@ -10939,21 +10978,21 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [_lane_s32, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]'] - - [_laneq_s32, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]'] - - [q_lane_s32, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [q_laneq_s32, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [_lane_u32, uint32x2_t, uint32x2_t, '1', '[LANE as u32, LANE as u32]'] - - [_laneq_u32, uint32x2_t, uint32x4_t, '2', '[LANE as u32, LANE as u32]'] - - [q_lane_u32, uint32x4_t, uint32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [q_laneq_u32, uint32x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [_lane_s32, int32x2_t, int32x2_t, '1'] + - [_laneq_s32, int32x2_t, int32x4_t, '2'] + - [q_lane_s32, int32x4_t, int32x2_t, '1'] + - [q_laneq_s32, int32x4_t, int32x4_t, '2'] + - [_lane_u32, uint32x2_t, uint32x2_t, '1'] + - [_laneq_u32, uint32x2_t, uint32x4_t, '2'] + - [q_lane_u32, uint32x4_t, uint32x2_t, '1'] + - [q_laneq_u32, uint32x4_t, uint32x4_t, '2'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] - FnCall: - "vmla{neon_type[1].no}" - - a - b - - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]] + - FnCall: ['vdup{type[0]}', [c], [LANE]] - name: "vmla{type[0]}" doc: "Vector multiply accumulate with scalar" @@ -10969,17 +11008,17 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [_lane_f32, float32x2_t, float32x2_t, '1', '[LANE as u32, LANE as u32]'] - - [_laneq_f32, float32x2_t, float32x4_t, '2', '[LANE as u32, LANE as u32]'] - - [q_lane_f32, float32x4_t, float32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [q_laneq_f32, float32x4_t, float32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [_lane_f32, float32x2_t, float32x2_t, '1'] + - [_laneq_f32, float32x2_t, float32x4_t, '2'] + - [q_lane_f32, float32x4_t, float32x2_t, '1'] + - [q_laneq_f32, float32x4_t, float32x4_t, '2'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] - FnCall: - "vmla{neon_type[1].no}" - - a - b - - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]] + - FnCall: ['vdup{type[0]}', [c], [LANE]] - name: "vmls{neon_type[0].N}" doc: "Vector multiply subtract with scalar" @@ -11058,21 +11097,21 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [_lane_s16, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [_laneq_s16, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [q_lane_s16, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [q_laneq_s16, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [_lane_u16, uint16x4_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [_laneq_u16, uint16x4_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [q_lane_u16, uint16x8_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [q_laneq_u16, uint16x8_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [_lane_s16, int16x4_t, int16x4_t, '2'] + - [_laneq_s16, int16x4_t, int16x8_t, '3'] + - [q_lane_s16, int16x8_t, int16x4_t, '2'] + - [q_laneq_s16, int16x8_t, int16x8_t, '3'] + - [_lane_u16, uint16x4_t, uint16x4_t, '2'] + - [_laneq_u16, uint16x4_t, uint16x8_t, '3'] + - [q_lane_u16, uint16x8_t, uint16x4_t, '2'] + - [q_laneq_u16, uint16x8_t, uint16x8_t, '3'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] - FnCall: - "vmls{neon_type[1].no}" - - a - b - - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]] + - FnCall: ['vdup{type[0]}', [c], [LANE]] - name: "vmls{type[0]}" doc: "Vector multiply subtract with scalar" @@ -11088,21 +11127,21 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [_lane_s32, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]'] - - [_laneq_s32, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]'] - - [q_lane_s32, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [q_laneq_s32, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [_lane_u32, uint32x2_t, uint32x2_t, '1', '[LANE as u32, LANE as u32]'] - - [_laneq_u32, uint32x2_t, uint32x4_t, '2', '[LANE as u32, LANE as u32]'] - - [q_lane_u32, uint32x4_t, uint32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [q_laneq_u32, uint32x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [_lane_s32, int32x2_t, int32x2_t, '1'] + - [_laneq_s32, int32x2_t, int32x4_t, '2'] + - [q_lane_s32, int32x4_t, int32x2_t, '1'] + - [q_laneq_s32, int32x4_t, int32x4_t, '2'] + - [_lane_u32, uint32x2_t, uint32x2_t, '1'] + - [_laneq_u32, uint32x2_t, uint32x4_t, '2'] + - [q_lane_u32, uint32x4_t, uint32x2_t, '1'] + - [q_laneq_u32, uint32x4_t, uint32x4_t, '2'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] - FnCall: - "vmls{neon_type[1].no}" - - a - b - - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]] + - FnCall: ['vdup{type[0]}', [c], [LANE]] - name: "vmls{type[0]}" doc: "Vector multiply subtract with scalar" @@ -11118,17 +11157,17 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [_lane_f32, float32x2_t, float32x2_t, '1', '[LANE as u32, LANE as u32]'] - - [_laneq_f32, float32x2_t, float32x4_t, '2', '[LANE as u32, LANE as u32]'] - - [q_lane_f32, float32x4_t, float32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [q_laneq_f32, float32x4_t, float32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [_lane_f32, float32x2_t, float32x2_t, '1'] + - [_laneq_f32, float32x2_t, float32x4_t, '2'] + - [q_lane_f32, float32x4_t, float32x2_t, '1'] + - [q_laneq_f32, float32x4_t, float32x4_t, '2'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] - FnCall: - "vmls{neon_type[1].no}" - - a - b - - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]] + - FnCall: ['vdup{type[0]}', [c], [LANE]] - name: "vmul{neon_type[0].N}" doc: "Vector multiply by scalar" @@ -11213,16 +11252,16 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [float32x2_t, float32x2_t, '_lane_f32', '1', '[LANE as u32, LANE as u32]'] - - [float32x2_t, float32x4_t, '_laneq_f32', '2', '[LANE as u32, LANE as u32]'] - - [float32x4_t, float32x2_t, 'q_lane_f32', '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [float32x4_t, float32x4_t, 'q_laneq_f32', '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [float32x2_t, float32x2_t, '_lane_f32', '1'] + - [float32x2_t, float32x4_t, '_laneq_f32', '2'] + - [float32x4_t, float32x2_t, 'q_lane_f32', '1'] + - [float32x4_t, float32x4_t, 'q_laneq_f32', '2'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] - FnCall: - simd_mul - - a - - FnCall: [simd_shuffle!, [b, b, "{type[4]}"]] + - FnCall: ['vdup{type[2]}', [b], [LANE]] - name: "vqrdmulh{type[0]}" doc: "Vector rounding saturating doubling multiply high by scalar" @@ -11238,17 +11277,17 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [_lane_s16, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [_laneq_s16, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [q_lane_s16, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [q_laneq_s16, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [_lane_s32, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]'] - - [_laneq_s32, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]'] - - [q_lane_s32, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [q_laneq_s32, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [_lane_s16, int16x4_t, int16x4_t, '2'] + - [_laneq_s16, int16x4_t, int16x8_t, '3'] + - [q_lane_s16, int16x8_t, int16x4_t, '2'] + - [q_laneq_s16, int16x8_t, int16x8_t, '3'] + - [_lane_s32, int32x2_t, int32x2_t, '1'] + - [_laneq_s32, int32x2_t, int32x4_t, '2'] + - [q_lane_s32, int32x4_t, int32x2_t, '1'] + - [q_laneq_s32, int32x4_t, int32x4_t, '2'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]] - - Let: [b, "{neon_type[1]}", {FnCall: [simd_shuffle!, [b, b, '{type[4]}']]}] + - Let: [b, {FnCall: ['vdup{type[0]}', [b], [LANE]]}] - FnCall: ["vqrdmulh{neon_type[1].no}", [a, b]] - name: "vqrdmulh{neon_type[0].N}" @@ -11365,16 +11404,16 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [int16x4_t, int16x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [int16x4_t, int16x8_t, int32x4_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [int32x2_t, int32x2_t, int64x2_t, '1', '[LANE as u32, LANE as u32]'] - - [int32x2_t, int32x4_t, int64x2_t, '2', '[LANE as u32, LANE as u32]'] + - [int16x4_t, int16x4_t, int32x4_t, '2'] + - [int16x4_t, int16x8_t, int32x4_t, '3'] + - [int32x2_t, int32x2_t, int64x2_t, '1'] + - [int32x2_t, int32x4_t, int64x2_t, '2'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]] - FnCall: - "vmull_{neon_type[0]}" - - a - - FnCall: [simd_shuffle!, [b, b, "{type[4]}"]] + - FnCall: ['vdup_lane{neon_type[1].nox}', [b], [LANE]] - name: "vmull_lane{neon_type[1].no}" doc: "Vector long multiply by scalar" @@ -11390,16 +11429,16 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [uint16x4_t, uint16x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [uint16x4_t, uint16x8_t, uint32x4_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [uint32x2_t, uint32x2_t, uint64x2_t, '1', '[LANE as u32, LANE as u32]'] - - [uint32x2_t, uint32x4_t, uint64x2_t, '2', '[LANE as u32, LANE as u32]'] + - [uint16x4_t, uint16x4_t, uint32x4_t, '2'] + - [uint16x4_t, uint16x8_t, uint32x4_t, '3'] + - [uint32x2_t, uint32x2_t, uint64x2_t, '1'] + - [uint32x2_t, uint32x4_t, uint64x2_t, '2'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]] - FnCall: - "vmull_{neon_type[0]}" - - a - - FnCall: [simd_shuffle!, [b, b, "{type[4]}"]] + - FnCall: ['vdup_lane{neon_type[1].nox}', [b], [LANE]] - name: "vfms{neon_type[0].N}" doc: "Floating-point fused Multiply-subtract to accumulator(vector)" @@ -11469,7 +11508,7 @@ intrinsics: - - a - FnCall: - "vdup{neon_type[0].N}" - - - FnCall: [simd_extract!, [b, 'LANE as u32']] + - - FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]] - name: "vrecpe{neon_type.no}" doc: "Unsigned reciprocal estimate" @@ -12495,6 +12534,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - [float32x2_t, float32x4_t, '[0, 1, 2, 3]'] - [poly8x8_t, poly8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] @@ -12522,6 +12562,7 @@ intrinsics: - *neon-cfg-arm-unstable - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] safety: safe + big_endian_inverse: true types: - uint8x16_t compose: @@ -12544,6 +12585,7 @@ intrinsics: - *neon-cfg-arm-unstable - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] safety: safe + big_endian_inverse: true types: - uint8x16_t compose: @@ -12566,6 +12608,7 @@ intrinsics: - *neon-cfg-arm-unstable - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] safety: safe + big_endian_inverse: true types: - [uint8x16_t, "aesmc"] compose: @@ -12588,6 +12631,7 @@ intrinsics: - *neon-cfg-arm-unstable - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] safety: safe + big_endian_inverse: true types: - [uint8x16_t, "aesimc"] compose: @@ -12610,6 +12654,7 @@ intrinsics: - *neon-cfg-arm-unstable - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] safety: safe + big_endian_inverse: true types: - [u32, "sha1h"] compose: @@ -12632,6 +12677,7 @@ intrinsics: - *neon-cfg-arm-unstable - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] safety: safe + big_endian_inverse: true types: - [u32, "sha1c", "uint32x4_t"] compose: @@ -12654,6 +12700,7 @@ intrinsics: - *neon-cfg-arm-unstable - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] safety: safe + big_endian_inverse: true types: - [u32, "sha1m", "uint32x4_t"] compose: @@ -12676,6 +12723,7 @@ intrinsics: - *neon-cfg-arm-unstable - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] safety: safe + big_endian_inverse: true types: - [u32, "sha1p", "uint32x4_t"] compose: @@ -12698,6 +12746,7 @@ intrinsics: - *neon-cfg-arm-unstable - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] safety: safe + big_endian_inverse: true types: - [uint32x4_t, "sha1su0"] compose: @@ -12720,6 +12769,7 @@ intrinsics: - *neon-cfg-arm-unstable - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] safety: safe + big_endian_inverse: true types: - [uint32x4_t, "sha1su1"] compose: @@ -12742,6 +12792,7 @@ intrinsics: - *neon-cfg-arm-unstable - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] safety: safe + big_endian_inverse: true types: - [uint32x4_t, "sha256h"] compose: @@ -12764,6 +12815,7 @@ intrinsics: - *neon-cfg-arm-unstable - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] safety: safe + big_endian_inverse: true types: - [uint32x4_t, "sha256h2"] compose: @@ -12786,6 +12838,7 @@ intrinsics: - *neon-cfg-arm-unstable - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] safety: safe + big_endian_inverse: true types: - [uint32x4_t, "sha256su0"] compose: @@ -12808,6 +12861,7 @@ intrinsics: - *neon-cfg-arm-unstable - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]] safety: safe + big_endian_inverse: true types: - [uint32x4_t, "sha256su1"] compose: @@ -13045,6 +13099,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - int8x8_t - int16x4_t @@ -13069,6 +13124,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - uint8x8_t - uint16x4_t @@ -13093,6 +13149,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - float32x2_t compose: @@ -13115,6 +13172,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - int8x8_t - int16x4_t @@ -13139,6 +13197,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - uint8x8_t - uint16x4_t @@ -13163,6 +13222,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - float32x2_t compose: @@ -13228,14 +13288,14 @@ intrinsics: attr: - *neon-v7 - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"{type[3]}"']]}]] - - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [raddhn2]]}]] + - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [raddhn2]]}]] - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe types: - - [uint8x8_t , uint16x8_t, uint8x16_t, 'vraddhn.i16', int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - [uint16x4_t, uint32x4_t, uint16x8_t, 'vraddhn.i32', int32x4_t, '[0, 1, 2, 3, 4, 5, 6, 7]'] - - [uint32x2_t, uint64x2_t, uint32x4_t, 'vraddhn.i64', int64x2_t, '[0, 1, 2, 3]'] + - [uint8x8_t , uint16x8_t, uint8x16_t, 'vraddhn.i16', int16x8_t] + - [uint16x4_t, uint32x4_t, uint16x8_t, 'vraddhn.i32', int32x4_t] + - [uint32x2_t, uint64x2_t, uint32x4_t, 'vraddhn.i64', int64x2_t] compose: - Let: - x @@ -13246,7 +13306,7 @@ intrinsics: - "vraddhn{neon_type[4].noq}" - - FnCall: [transmute, [b]] - FnCall: [transmute, [c]] - - FnCall: ["simd_shuffle!", [a, x, '{type[5]}']] + - FnCall: ['vcombine_{neon_type[0]}', [a, x]] - name: "vraddhn_high{neon_type[1].noq}" doc: "Rounding Add returning High Narrow (high half)." @@ -13255,14 +13315,14 @@ intrinsics: attr: - *neon-v7 - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"{type[3]}"']]}]] - - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [raddhn2]]}]] + - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [raddhn2]]}]] - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe types: - - [int8x8_t , int16x8_t, int8x16_t, 'vraddhn.i16', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - [int16x4_t, int32x4_t, int16x8_t, 'vraddhn.i32', '[0, 1, 2, 3, 4, 5, 6, 7]'] - - [int32x2_t, int64x2_t, int32x4_t, 'vraddhn.i64', '[0, 1, 2, 3]'] + - [int8x8_t , int16x8_t, int8x16_t, 'vraddhn.i16'] + - [int16x4_t, int32x4_t, int16x8_t, 'vraddhn.i32'] + - [int32x2_t, int64x2_t, int32x4_t, 'vraddhn.i64'] compose: - Let: - x @@ -13270,7 +13330,7 @@ intrinsics: - "vraddhn{neon_type[1].noq}" - - b - c - - FnCall: ["simd_shuffle!", [a, x, '{type[4]}']] + - FnCall: ['vcombine_{neon_type[0]}', [a, x]] - name: "vpadd{neon_type.no}" doc: "Add pairwise." @@ -13283,6 +13343,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - int8x8_t - int16x4_t @@ -13300,7 +13361,6 @@ intrinsics: doc: "Add pairwise." arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] return_type: "{neon_type[0]}" - big_endian_inverse: false attr: - *neon-v7 - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vpadd]]}]] @@ -14134,6 +14194,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [nop] safety: safe + big_endian_inverse: true types: - [float16x4_t, float16x8_t] compose: @@ -14151,6 +14212,7 @@ intrinsics: - *target-not-arm64ec assert_instr: [nop] safety: safe + big_endian_inverse: true types: - [float16x4_t, float16x8_t, 'low', "[0, 1, 2, 3]"] - [float16x4_t, float16x8_t, 'high', "[4, 5, 6, 7]"] @@ -14171,6 +14233,7 @@ intrinsics: - *target-not-arm64ec static_defs: ['const LANE: i32'] safety: safe + big_endian_inverse: true types: - [float16x4_t, f16, '_lane_f16', '2'] - [float16x8_t, f16, 'q_lane_f16', '3'] @@ -14325,7 +14388,6 @@ intrinsics: doc: "Load one single-element structure and Replicate to all lanes (of one register)." arguments: ["ptr: {type[1]}"] return_type: "{neon_type[2]}" - big_endian_inverse: false attr: - *neon-v7 - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['"{type[3]}"']] } ]] @@ -14481,26 +14543,26 @@ intrinsics: attr: - *neon-v7 - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['{type[3]}']] } ]] - - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['{type[4]}']]}] ] + - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, { FnCall: [assert_instr, ['{type[4]}']]}] ] - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe types: - - ['vaddl_high_s8', 'int8x16_t', 'int16x8_t', 'vaddl', 'saddl2', 'int8x8_t', '[8, 9, 10, 11, 12, 13, 14, 15]'] - - ['vaddl_high_s16', 'int16x8_t', 'int32x4_t', 'vaddl', 'saddl2', 'int16x4_t', '[4, 5, 6, 7]'] - - ['vaddl_high_s32', 'int32x4_t', 'int64x2_t', 'vaddl', 'saddl2', 'int32x2_t', '[2, 3]'] - - ['vaddl_high_u8', 'uint8x16_t', 'uint16x8_t', 'vaddl', 'uaddl2', 'uint8x8_t', '[8, 9, 10, 11, 12, 13, 14, 15]'] - - ['vaddl_high_u16', 'uint16x8_t', 'uint32x4_t', 'vaddl', 'uaddl2', 'uint16x4_t', '[4, 5, 6, 7]'] - - ['vaddl_high_u32', 'uint32x4_t', 'uint64x2_t', 'vaddl', 'uaddl2', 'uint32x2_t', '[2, 3]'] + - ['vaddl_high_s8', 'int8x16_t', 'int16x8_t', 'vaddl', 'saddl2', 'int8x8_t'] + - ['vaddl_high_s16', 'int16x8_t', 'int32x4_t', 'vaddl', 'saddl2', 'int16x4_t'] + - ['vaddl_high_s32', 'int32x4_t', 'int64x2_t', 'vaddl', 'saddl2', 'int32x2_t'] + - ['vaddl_high_u8', 'uint8x16_t', 'uint16x8_t', 'vaddl', 'uaddl2', 'uint8x8_t'] + - ['vaddl_high_u16', 'uint16x8_t', 'uint32x4_t', 'vaddl', 'uaddl2', 'uint16x4_t'] + - ['vaddl_high_u32', 'uint32x4_t', 'uint64x2_t', 'vaddl', 'uaddl2', 'uint32x2_t'] compose: - Let: - a - '{neon_type[5]}' - - FnCall: ['simd_shuffle!', [a, a, '{type[6]}']] + - FnCall: ['vget_high_{neon_type[1]}', [a]] - Let: - b - '{neon_type[5]}' - - FnCall: ['simd_shuffle!', [b, b, '{type[6]}']] + - FnCall: ['vget_high_{neon_type[1]}', [b]] - Let: [a, '{neon_type[2]}', {FnCall: [simd_cast, [a]]}] - Let: [b, '{neon_type[2]}', {FnCall: [simd_cast, [b]]}] - FnCall: [simd_add, [a, b]] @@ -14537,22 +14599,21 @@ intrinsics: attr: - *neon-v7 - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['{type[3]}']] } ]] - - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['{type[4]}']]}] ] + - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, { FnCall: [assert_instr, ['{type[4]}']]}] ] - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe types: - - ['vaddw_high_s8', 'int16x8_t', 'int8x16_t', 'vaddw', 'saddw2', 'int8x8_t', '[8, 9, 10, 11, 12, 13, 14, 15]'] - - ['vaddw_high_s16', 'int32x4_t', 'int16x8_t', 'vaddw', 'saddw2', 'int16x4_t', '[4, 5, 6, 7]'] - - ['vaddw_high_s32', 'int64x2_t', 'int32x4_t', 'vaddw', 'saddw2', 'int32x2_t', '[2, 3]'] - - ['vaddw_high_u8', 'uint16x8_t', 'uint8x16_t', 'vaddw', 'uaddw2', 'uint8x8_t', '[8, 9, 10, 11, 12, 13, 14, 15]'] - - ['vaddw_high_u16', 'uint32x4_t', 'uint16x8_t', 'vaddw', 'uaddw2', 'uint16x4_t', '[4, 5, 6, 7]'] - - ['vaddw_high_u32', 'uint64x2_t', 'uint32x4_t', 'vaddw', 'uaddw2', 'uint32x2_t', '[2, 3]'] + - ['vaddw_high_s8', 'int16x8_t', 'int8x16_t', 'vaddw', 'saddw2', 'int8x8_t'] + - ['vaddw_high_s16', 'int32x4_t', 'int16x8_t', 'vaddw', 'saddw2', 'int16x4_t'] + - ['vaddw_high_s32', 'int64x2_t', 'int32x4_t', 'vaddw', 'saddw2', 'int32x2_t'] + - ['vaddw_high_u8', 'uint16x8_t', 'uint8x16_t', 'vaddw', 'uaddw2', 'uint8x8_t'] + - ['vaddw_high_u16', 'uint32x4_t', 'uint16x8_t', 'vaddw', 'uaddw2', 'uint16x4_t'] + - ['vaddw_high_u32', 'uint64x2_t', 'uint32x4_t', 'vaddw', 'uaddw2', 'uint32x2_t'] compose: - Let: - b - - '{neon_type[5]}' - - FnCall: ['simd_shuffle!', [b, b, '{type[6]}']] + - FnCall: ['vget_high_{neon_type[2]}', [b]] - Let: - b - '{neon_type[1]}' @@ -14595,17 +14656,17 @@ intrinsics: attr: - *neon-v7 - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['vaddhn']] } ]] - - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['addhn2']]}] ] + - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, { FnCall: [assert_instr, ['addhn2']]}] ] - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe types: - - ['vaddhn_high_s16', 'int8x8_t', 'int16x8_t', 'int8x16_t', 'int16x8_t::splat(8)', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - ['vaddhn_high_s32', 'int16x4_t', 'int32x4_t', 'int16x8_t', 'int32x4_t::splat(16)', '[0, 1, 2, 3, 4, 5, 6, 7]'] - - ['vaddhn_high_s64', 'int32x2_t', 'int64x2_t', 'int32x4_t', 'int64x2_t::splat(32)', '[0, 1, 2, 3]'] - - ['vaddhn_high_u16', 'uint8x8_t', 'uint16x8_t', 'uint8x16_t', 'uint16x8_t::splat(8)', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - ['vaddhn_high_u32', 'uint16x4_t', 'uint32x4_t', 'uint16x8_t', 'uint32x4_t::splat(16)', '[0, 1, 2, 3, 4, 5, 6, 7]'] - - ['vaddhn_high_u64', 'uint32x2_t', 'uint64x2_t', 'uint32x4_t', 'uint64x2_t::splat(32)', '[0, 1, 2, 3]'] + - ['vaddhn_high_s16', 'int8x8_t', 'int16x8_t', 'int8x16_t', 'int16x8_t::splat(8)'] + - ['vaddhn_high_s32', 'int16x4_t', 'int32x4_t', 'int16x8_t', 'int32x4_t::splat(16)'] + - ['vaddhn_high_s64', 'int32x2_t', 'int64x2_t', 'int32x4_t', 'int64x2_t::splat(32)'] + - ['vaddhn_high_u16', 'uint8x8_t', 'uint16x8_t', 'uint8x16_t', 'uint16x8_t::splat(8)'] + - ['vaddhn_high_u32', 'uint16x4_t', 'uint32x4_t', 'uint16x8_t', 'uint32x4_t::splat(16)'] + - ['vaddhn_high_u64', 'uint32x2_t', 'uint64x2_t', 'uint32x4_t', 'uint64x2_t::splat(32)'] compose: - Let: - x @@ -14618,7 +14679,7 @@ intrinsics: - - a - b - '{type[4]}' - - FnCall: ['simd_shuffle!', [r, x, '{type[5]}']] + - FnCall: ['vcombine_{neon_type[1]}', [r, x]] - name: "{type[0]}" doc: "Vector narrow integer." @@ -14924,6 +14985,7 @@ intrinsics: arguments: ["v: {neon_type[1]}"] return_type: "{type[2]}" safety: safe + big_endian_inverse: true static_defs: ['const IMM5: i32'] attr: - *neon-v7 @@ -14962,6 +15024,7 @@ intrinsics: arguments: ["v: {neon_type[1]}"] return_type: "{type[2]}" safety: safe + big_endian_inverse: true static_defs: ['const IMM5: i32'] attr: - *neon-v7 @@ -15008,9 +15071,11 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - - ['vget_high_s64', 'int64x2_t', 'int64x1_t', 'vmov', 'ext', 'int64x1_t([simd_extract!(a, 1)])'] - - ['vget_high_u64', 'uint64x2_t', 'uint64x1_t', 'vmov', 'ext', 'uint64x1_t([simd_extract!(a, 1)])'] + - ['vget_high_s64', 'int64x2_t', 'int64x1_t', 'vmov', 'ext', 'int64x1_t([simd_extract!(a, 1)])'] + - ['vget_high_u64', 'uint64x2_t', 'uint64x1_t', 'vmov', 'ext', 'uint64x1_t([simd_extract!(a, 1)])'] + - ['vget_high_p64', 'poly64x2_t', 'poly64x1_t', 'vmov', 'ext', 'transmute(u64x1::new(simd_extract!(a, 1)))'] compose: - Identifier: ['{type[5]}', UnsafeSymbol] @@ -15024,9 +15089,11 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - - ['vget_low_s64', 'int64x2_t', 'int64x1_t', 'int64x1_t([simd_extract!(a, 0)])'] - - ['vget_low_u64', 'uint64x2_t', 'uint64x1_t', 'uint64x1_t([simd_extract!(a, 0)])'] + - ['vget_low_s64', 'int64x2_t', 'int64x1_t', 'int64x1_t([simd_extract!(a, 0)])'] + - ['vget_low_u64', 'uint64x2_t', 'uint64x1_t', 'uint64x1_t([simd_extract!(a, 0)])'] + - ['vget_low_p64', 'poly64x2_t', 'poly64x1_t', 'transmute(u64x1::new(simd_extract!(a, 0)))'] compose: - Identifier: ['{type[3]}', UnsafeSymbol] @@ -15041,6 +15108,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - ['vget_high_s8', 'int8x16_t', 'int8x8_t', 'vmov', 'ext', '[8, 9, 10, 11, 12, 13, 14, 15]'] - ['vget_high_u8', 'uint8x16_t', 'uint8x8_t', 'vmov', 'ext', '[8, 9, 10, 11, 12, 13, 14, 15]'] @@ -15064,6 +15132,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - ['vget_low_s8', 'int8x16_t', 'int8x8_t', '[0, 1, 2, 3, 4, 5, 6, 7]'] - ['vget_low_u8', 'uint8x16_t', 'uint8x8_t','[0, 1, 2, 3, 4, 5, 6, 7]'] @@ -15216,6 +15285,7 @@ intrinsics: - *neon-not-arm-stable - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true types: - ['vrev16_s8', 'int8x8_t', 'vrev16.8', 'rev16', '[1, 0, 3, 2, 5, 4, 7, 6]'] - ['vrev16q_s8', 'int8x16_t', 'vrev16.8', 'rev16', '[1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]'] diff --git a/library/stdarch/crates/stdarch-gen-arm/src/fn_suffix.rs b/library/stdarch/crates/stdarch-gen-arm/src/fn_suffix.rs index 26c156ae178aa..6fba3dc74476e 100644 --- a/library/stdarch/crates/stdarch-gen-arm/src/fn_suffix.rs +++ b/library/stdarch/crates/stdarch-gen-arm/src/fn_suffix.rs @@ -188,7 +188,7 @@ impl FromStr for SuffixKind { "rot90_lane" => Ok(SuffixKind::Rot90Lane), "rot90_laneq" => Ok(SuffixKind::Rot90LaneQ), "rot180" => Ok(SuffixKind::Rot180), - "rot180_lane" => Ok(SuffixKind::Rot180LaneQ), + "rot180_lane" => Ok(SuffixKind::Rot180Lane), "rot180_laneq" => Ok(SuffixKind::Rot180LaneQ), "u" => Ok(SuffixKind::Unsigned), "nox" => Ok(SuffixKind::NoX), diff --git a/library/stdarch/crates/stdarch-gen-arm/src/intrinsic.rs b/library/stdarch/crates/stdarch-gen-arm/src/intrinsic.rs index dc467fd307810..72fb97fee1f08 100644 --- a/library/stdarch/crates/stdarch-gen-arm/src/intrinsic.rs +++ b/library/stdarch/crates/stdarch-gen-arm/src/intrinsic.rs @@ -1059,23 +1059,8 @@ impl Intrinsic { /// Add a big endian implementation fn generate_big_endian(&self, variant: &mut Intrinsic) { - /* We can't always blindly reverse the bits only in certain conditions - * do we need a different order - thus this allows us to have the - * ability to do so without having to play codegolf with the yaml AST */ - let should_reverse = { - if let Some(should_reverse) = variant.big_endian_inverse { - should_reverse - } else if variant.compose.len() == 1 { - match &variant.compose[0] { - Expression::FnCall(fn_call) => fn_call.0.to_string() == "transmute", - _ => false, - } - } else { - false - } - }; - - if !should_reverse { + // We only reverse if it was specifically requested + if !variant.big_endian_inverse.unwrap_or(false) { return; } diff --git a/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec b/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec index 9a9b1a143eff0..867e071b62edc 100644 --- a/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec +++ b/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec @@ -228,21 +228,25 @@ asm-fmts = xd, xj, ui6 data-types = V4DI, V4DI, UQI /// lasx_xvbitclr_b +impl = portable name = lasx_xvbitclr_b asm-fmts = xd, xj, xk data-types = UV32QI, UV32QI, UV32QI /// lasx_xvbitclr_h +impl = portable name = lasx_xvbitclr_h asm-fmts = xd, xj, xk data-types = UV16HI, UV16HI, UV16HI /// lasx_xvbitclr_w +impl = portable name = lasx_xvbitclr_w asm-fmts = xd, xj, xk data-types = UV8SI, UV8SI, UV8SI /// lasx_xvbitclr_d +impl = portable name = lasx_xvbitclr_d asm-fmts = xd, xj, xk data-types = UV4DI, UV4DI, UV4DI @@ -268,21 +272,25 @@ asm-fmts = xd, xj, ui6 data-types = UV4DI, UV4DI, UQI /// lasx_xvbitset_b +impl = portable name = lasx_xvbitset_b asm-fmts = xd, xj, xk data-types = UV32QI, UV32QI, UV32QI /// lasx_xvbitset_h +impl = portable name = lasx_xvbitset_h asm-fmts = xd, xj, xk data-types = UV16HI, UV16HI, UV16HI /// lasx_xvbitset_w +impl = portable name = lasx_xvbitset_w asm-fmts = xd, xj, xk data-types = UV8SI, UV8SI, UV8SI /// lasx_xvbitset_d +impl = portable name = lasx_xvbitset_d asm-fmts = xd, xj, xk data-types = UV4DI, UV4DI, UV4DI @@ -308,21 +316,25 @@ asm-fmts = xd, xj, ui6 data-types = UV4DI, UV4DI, UQI /// lasx_xvbitrev_b +impl = portable name = lasx_xvbitrev_b asm-fmts = xd, xj, xk data-types = UV32QI, UV32QI, UV32QI /// lasx_xvbitrev_h +impl = portable name = lasx_xvbitrev_h asm-fmts = xd, xj, xk data-types = UV16HI, UV16HI, UV16HI /// lasx_xvbitrev_w +impl = portable name = lasx_xvbitrev_w asm-fmts = xd, xj, xk data-types = UV8SI, UV8SI, UV8SI /// lasx_xvbitrev_d +impl = portable name = lasx_xvbitrev_d asm-fmts = xd, xj, xk data-types = UV4DI, UV4DI, UV4DI @@ -912,61 +924,73 @@ asm-fmts = xd, xj, ui6 data-types = UV4DI, UV4DI, UQI /// lasx_xvadda_b +impl = portable name = lasx_xvadda_b asm-fmts = xd, xj, xk data-types = V32QI, V32QI, V32QI /// lasx_xvadda_h +impl = portable name = lasx_xvadda_h asm-fmts = xd, xj, xk data-types = V16HI, V16HI, V16HI /// lasx_xvadda_w +impl = portable name = lasx_xvadda_w asm-fmts = xd, xj, xk data-types = V8SI, V8SI, V8SI /// lasx_xvadda_d +impl = portable name = lasx_xvadda_d asm-fmts = xd, xj, xk data-types = V4DI, V4DI, V4DI /// lasx_xvsadd_b +impl = portable name = lasx_xvsadd_b asm-fmts = xd, xj, xk data-types = V32QI, V32QI, V32QI /// lasx_xvsadd_h +impl = portable name = lasx_xvsadd_h asm-fmts = xd, xj, xk data-types = V16HI, V16HI, V16HI /// lasx_xvsadd_w +impl = portable name = lasx_xvsadd_w asm-fmts = xd, xj, xk data-types = V8SI, V8SI, V8SI /// lasx_xvsadd_d +impl = portable name = lasx_xvsadd_d asm-fmts = xd, xj, xk data-types = V4DI, V4DI, V4DI /// lasx_xvsadd_bu +impl = portable name = lasx_xvsadd_bu asm-fmts = xd, xj, xk data-types = UV32QI, UV32QI, UV32QI /// lasx_xvsadd_hu +impl = portable name = lasx_xvsadd_hu asm-fmts = xd, xj, xk data-types = UV16HI, UV16HI, UV16HI /// lasx_xvsadd_wu +impl = portable name = lasx_xvsadd_wu asm-fmts = xd, xj, xk data-types = UV8SI, UV8SI, UV8SI /// lasx_xvsadd_du +impl = portable name = lasx_xvsadd_du asm-fmts = xd, xj, xk data-types = UV4DI, UV4DI, UV4DI @@ -1052,81 +1076,97 @@ asm-fmts = xd, xj, xk data-types = UV4DI, UV4DI, UV4DI /// lasx_xvssub_b +impl = portable name = lasx_xvssub_b asm-fmts = xd, xj, xk data-types = V32QI, V32QI, V32QI /// lasx_xvssub_h +impl = portable name = lasx_xvssub_h asm-fmts = xd, xj, xk data-types = V16HI, V16HI, V16HI /// lasx_xvssub_w +impl = portable name = lasx_xvssub_w asm-fmts = xd, xj, xk data-types = V8SI, V8SI, V8SI /// lasx_xvssub_d +impl = portable name = lasx_xvssub_d asm-fmts = xd, xj, xk data-types = V4DI, V4DI, V4DI /// lasx_xvssub_bu +impl = portable name = lasx_xvssub_bu asm-fmts = xd, xj, xk data-types = UV32QI, UV32QI, UV32QI /// lasx_xvssub_hu +impl = portable name = lasx_xvssub_hu asm-fmts = xd, xj, xk data-types = UV16HI, UV16HI, UV16HI /// lasx_xvssub_wu +impl = portable name = lasx_xvssub_wu asm-fmts = xd, xj, xk data-types = UV8SI, UV8SI, UV8SI /// lasx_xvssub_du +impl = portable name = lasx_xvssub_du asm-fmts = xd, xj, xk data-types = UV4DI, UV4DI, UV4DI /// lasx_xvabsd_b +impl = portable name = lasx_xvabsd_b asm-fmts = xd, xj, xk data-types = V32QI, V32QI, V32QI /// lasx_xvabsd_h +impl = portable name = lasx_xvabsd_h asm-fmts = xd, xj, xk data-types = V16HI, V16HI, V16HI /// lasx_xvabsd_w +impl = portable name = lasx_xvabsd_w asm-fmts = xd, xj, xk data-types = V8SI, V8SI, V8SI /// lasx_xvabsd_d +impl = portable name = lasx_xvabsd_d asm-fmts = xd, xj, xk data-types = V4DI, V4DI, V4DI /// lasx_xvabsd_bu +impl = portable name = lasx_xvabsd_bu asm-fmts = xd, xj, xk data-types = UV32QI, UV32QI, UV32QI /// lasx_xvabsd_hu +impl = portable name = lasx_xvabsd_hu asm-fmts = xd, xj, xk data-types = UV16HI, UV16HI, UV16HI /// lasx_xvabsd_wu +impl = portable name = lasx_xvabsd_wu asm-fmts = xd, xj, xk data-types = UV8SI, UV8SI, UV8SI /// lasx_xvabsd_du +impl = portable name = lasx_xvabsd_du asm-fmts = xd, xj, xk data-types = UV4DI, UV4DI, UV4DI @@ -1380,41 +1420,49 @@ asm-fmts = xd, xj, ui1 data-types = V4DI, V4DI, UQI /// lasx_xvpickev_b +impl = portable name = lasx_xvpickev_b asm-fmts = xd, xj, xk data-types = V32QI, V32QI, V32QI /// lasx_xvpickev_h +impl = portable name = lasx_xvpickev_h asm-fmts = xd, xj, xk data-types = V16HI, V16HI, V16HI /// lasx_xvpickev_w +impl = portable name = lasx_xvpickev_w asm-fmts = xd, xj, xk data-types = V8SI, V8SI, V8SI /// lasx_xvpickev_d +impl = portable name = lasx_xvpickev_d asm-fmts = xd, xj, xk data-types = V4DI, V4DI, V4DI /// lasx_xvpickod_b +impl = portable name = lasx_xvpickod_b asm-fmts = xd, xj, xk data-types = V32QI, V32QI, V32QI /// lasx_xvpickod_h +impl = portable name = lasx_xvpickod_h asm-fmts = xd, xj, xk data-types = V16HI, V16HI, V16HI /// lasx_xvpickod_w +impl = portable name = lasx_xvpickod_w asm-fmts = xd, xj, xk data-types = V8SI, V8SI, V8SI /// lasx_xvpickod_d +impl = portable name = lasx_xvpickod_d asm-fmts = xd, xj, xk data-types = V4DI, V4DI, V4DI diff --git a/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec b/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec index 8fd267889988e..b9df7bd96b9cb 100644 --- a/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec +++ b/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec @@ -228,21 +228,25 @@ asm-fmts = vd, vj, ui6 data-types = V2DI, V2DI, UQI /// lsx_vbitclr_b +impl = portable name = lsx_vbitclr_b asm-fmts = vd, vj, vk data-types = UV16QI, UV16QI, UV16QI /// lsx_vbitclr_h +impl = portable name = lsx_vbitclr_h asm-fmts = vd, vj, vk data-types = UV8HI, UV8HI, UV8HI /// lsx_vbitclr_w +impl = portable name = lsx_vbitclr_w asm-fmts = vd, vj, vk data-types = UV4SI, UV4SI, UV4SI /// lsx_vbitclr_d +impl = portable name = lsx_vbitclr_d asm-fmts = vd, vj, vk data-types = UV2DI, UV2DI, UV2DI @@ -268,21 +272,25 @@ asm-fmts = vd, vj, ui6 data-types = UV2DI, UV2DI, UQI /// lsx_vbitset_b +impl = portable name = lsx_vbitset_b asm-fmts = vd, vj, vk data-types = UV16QI, UV16QI, UV16QI /// lsx_vbitset_h +impl = portable name = lsx_vbitset_h asm-fmts = vd, vj, vk data-types = UV8HI, UV8HI, UV8HI /// lsx_vbitset_w +impl = portable name = lsx_vbitset_w asm-fmts = vd, vj, vk data-types = UV4SI, UV4SI, UV4SI /// lsx_vbitset_d +impl = portable name = lsx_vbitset_d asm-fmts = vd, vj, vk data-types = UV2DI, UV2DI, UV2DI @@ -308,21 +316,25 @@ asm-fmts = vd, vj, ui6 data-types = UV2DI, UV2DI, UQI /// lsx_vbitrev_b +impl = portable name = lsx_vbitrev_b asm-fmts = vd, vj, vk data-types = UV16QI, UV16QI, UV16QI /// lsx_vbitrev_h +impl = portable name = lsx_vbitrev_h asm-fmts = vd, vj, vk data-types = UV8HI, UV8HI, UV8HI /// lsx_vbitrev_w +impl = portable name = lsx_vbitrev_w asm-fmts = vd, vj, vk data-types = UV4SI, UV4SI, UV4SI /// lsx_vbitrev_d +impl = portable name = lsx_vbitrev_d asm-fmts = vd, vj, vk data-types = UV2DI, UV2DI, UV2DI @@ -912,61 +924,73 @@ asm-fmts = vd, vj, ui6 data-types = UV2DI, UV2DI, UQI /// lsx_vadda_b +impl = portable name = lsx_vadda_b asm-fmts = vd, vj, vk data-types = V16QI, V16QI, V16QI /// lsx_vadda_h +impl = portable name = lsx_vadda_h asm-fmts = vd, vj, vk data-types = V8HI, V8HI, V8HI /// lsx_vadda_w +impl = portable name = lsx_vadda_w asm-fmts = vd, vj, vk data-types = V4SI, V4SI, V4SI /// lsx_vadda_d +impl = portable name = lsx_vadda_d asm-fmts = vd, vj, vk data-types = V2DI, V2DI, V2DI /// lsx_vsadd_b +impl = portable name = lsx_vsadd_b asm-fmts = vd, vj, vk data-types = V16QI, V16QI, V16QI /// lsx_vsadd_h +impl = portable name = lsx_vsadd_h asm-fmts = vd, vj, vk data-types = V8HI, V8HI, V8HI /// lsx_vsadd_w +impl = portable name = lsx_vsadd_w asm-fmts = vd, vj, vk data-types = V4SI, V4SI, V4SI /// lsx_vsadd_d +impl = portable name = lsx_vsadd_d asm-fmts = vd, vj, vk data-types = V2DI, V2DI, V2DI /// lsx_vsadd_bu +impl = portable name = lsx_vsadd_bu asm-fmts = vd, vj, vk data-types = UV16QI, UV16QI, UV16QI /// lsx_vsadd_hu +impl = portable name = lsx_vsadd_hu asm-fmts = vd, vj, vk data-types = UV8HI, UV8HI, UV8HI /// lsx_vsadd_wu +impl = portable name = lsx_vsadd_wu asm-fmts = vd, vj, vk data-types = UV4SI, UV4SI, UV4SI /// lsx_vsadd_du +impl = portable name = lsx_vsadd_du asm-fmts = vd, vj, vk data-types = UV2DI, UV2DI, UV2DI @@ -1052,81 +1076,97 @@ asm-fmts = vd, vj, vk data-types = UV2DI, UV2DI, UV2DI /// lsx_vssub_b +impl = portable name = lsx_vssub_b asm-fmts = vd, vj, vk data-types = V16QI, V16QI, V16QI /// lsx_vssub_h +impl = portable name = lsx_vssub_h asm-fmts = vd, vj, vk data-types = V8HI, V8HI, V8HI /// lsx_vssub_w +impl = portable name = lsx_vssub_w asm-fmts = vd, vj, vk data-types = V4SI, V4SI, V4SI /// lsx_vssub_d +impl = portable name = lsx_vssub_d asm-fmts = vd, vj, vk data-types = V2DI, V2DI, V2DI /// lsx_vssub_bu +impl = portable name = lsx_vssub_bu asm-fmts = vd, vj, vk data-types = UV16QI, UV16QI, UV16QI /// lsx_vssub_hu +impl = portable name = lsx_vssub_hu asm-fmts = vd, vj, vk data-types = UV8HI, UV8HI, UV8HI /// lsx_vssub_wu +impl = portable name = lsx_vssub_wu asm-fmts = vd, vj, vk data-types = UV4SI, UV4SI, UV4SI /// lsx_vssub_du +impl = portable name = lsx_vssub_du asm-fmts = vd, vj, vk data-types = UV2DI, UV2DI, UV2DI /// lsx_vabsd_b +impl = portable name = lsx_vabsd_b asm-fmts = vd, vj, vk data-types = V16QI, V16QI, V16QI /// lsx_vabsd_h +impl = portable name = lsx_vabsd_h asm-fmts = vd, vj, vk data-types = V8HI, V8HI, V8HI /// lsx_vabsd_w +impl = portable name = lsx_vabsd_w asm-fmts = vd, vj, vk data-types = V4SI, V4SI, V4SI /// lsx_vabsd_d +impl = portable name = lsx_vabsd_d asm-fmts = vd, vj, vk data-types = V2DI, V2DI, V2DI /// lsx_vabsd_bu +impl = portable name = lsx_vabsd_bu asm-fmts = vd, vj, vk data-types = UV16QI, UV16QI, UV16QI /// lsx_vabsd_hu +impl = portable name = lsx_vabsd_hu asm-fmts = vd, vj, vk data-types = UV8HI, UV8HI, UV8HI /// lsx_vabsd_wu +impl = portable name = lsx_vabsd_wu asm-fmts = vd, vj, vk data-types = UV4SI, UV4SI, UV4SI /// lsx_vabsd_du +impl = portable name = lsx_vabsd_du asm-fmts = vd, vj, vk data-types = UV2DI, UV2DI, UV2DI @@ -1400,41 +1440,49 @@ asm-fmts = vd, vj, ui1 data-types = V2DI, V2DI, UQI /// lsx_vpickev_b +impl = portable name = lsx_vpickev_b asm-fmts = vd, vj, vk data-types = V16QI, V16QI, V16QI /// lsx_vpickev_h +impl = portable name = lsx_vpickev_h asm-fmts = vd, vj, vk data-types = V8HI, V8HI, V8HI /// lsx_vpickev_w +impl = portable name = lsx_vpickev_w asm-fmts = vd, vj, vk data-types = V4SI, V4SI, V4SI /// lsx_vpickev_d +impl = portable name = lsx_vpickev_d asm-fmts = vd, vj, vk data-types = V2DI, V2DI, V2DI /// lsx_vpickod_b +impl = portable name = lsx_vpickod_b asm-fmts = vd, vj, vk data-types = V16QI, V16QI, V16QI /// lsx_vpickod_h +impl = portable name = lsx_vpickod_h asm-fmts = vd, vj, vk data-types = V8HI, V8HI, V8HI /// lsx_vpickod_w +impl = portable name = lsx_vpickod_w asm-fmts = vd, vj, vk data-types = V4SI, V4SI, V4SI /// lsx_vpickod_d +impl = portable name = lsx_vpickod_d asm-fmts = vd, vj, vk data-types = V2DI, V2DI, V2DI diff --git a/library/stdarch/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt b/library/stdarch/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt index 2d5e9817a31c8..e07ac41f9c44e 100644 --- a/library/stdarch/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt +++ b/library/stdarch/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt @@ -191,6 +191,54 @@ lsx_vrepli_b lsx_vrepli_h lsx_vrepli_w lsx_vrepli_d +lsx_vbitclr_b +lsx_vbitclr_h +lsx_vbitclr_w +lsx_vbitclr_d +lsx_vbitset_b +lsx_vbitset_h +lsx_vbitset_w +lsx_vbitset_d +lsx_vbitrev_b +lsx_vbitrev_h +lsx_vbitrev_w +lsx_vbitrev_d +lsx_vsadd_b +lsx_vsadd_h +lsx_vsadd_w +lsx_vsadd_d +lsx_vsadd_bu +lsx_vsadd_hu +lsx_vsadd_wu +lsx_vsadd_du +lsx_vssub_b +lsx_vssub_h +lsx_vssub_w +lsx_vssub_d +lsx_vssub_bu +lsx_vssub_hu +lsx_vssub_wu +lsx_vssub_du +lsx_vadda_b +lsx_vadda_h +lsx_vadda_w +lsx_vadda_d +lsx_vabsd_b +lsx_vabsd_h +lsx_vabsd_w +lsx_vabsd_d +lsx_vabsd_bu +lsx_vabsd_hu +lsx_vabsd_wu +lsx_vabsd_du +lsx_vpickev_b +lsx_vpickev_h +lsx_vpickev_w +lsx_vpickev_d +lsx_vpickod_b +lsx_vpickod_h +lsx_vpickod_w +lsx_vpickod_d # LASX intrinsics lasx_xvsll_b @@ -379,3 +427,51 @@ lasx_xvrepli_b lasx_xvrepli_h lasx_xvrepli_w lasx_xvrepli_d +lasx_xvbitclr_b +lasx_xvbitclr_h +lasx_xvbitclr_w +lasx_xvbitclr_d +lasx_xvbitset_b +lasx_xvbitset_h +lasx_xvbitset_w +lasx_xvbitset_d +lasx_xvbitrev_b +lasx_xvbitrev_h +lasx_xvbitrev_w +lasx_xvbitrev_d +lasx_xvsadd_b +lasx_xvsadd_h +lasx_xvsadd_w +lasx_xvsadd_d +lasx_xvsadd_bu +lasx_xvsadd_hu +lasx_xvsadd_wu +lasx_xvsadd_du +lasx_xvssub_b +lasx_xvssub_h +lasx_xvssub_w +lasx_xvssub_d +lasx_xvssub_bu +lasx_xvssub_hu +lasx_xvssub_wu +lasx_xvssub_du +lasx_xvadda_b +lasx_xvadda_h +lasx_xvadda_w +lasx_xvadda_d +lasx_xvabsd_b +lasx_xvabsd_h +lasx_xvabsd_w +lasx_xvabsd_d +lasx_xvabsd_bu +lasx_xvabsd_hu +lasx_xvabsd_wu +lasx_xvabsd_du +lasx_xvpickev_b +lasx_xvpickev_h +lasx_xvpickev_w +lasx_xvpickev_d +lasx_xvpickod_b +lasx_xvpickod_h +lasx_xvpickod_w +lasx_xvpickod_d diff --git a/library/stdarch/crates/stdarch-verify/tests/arm.rs b/library/stdarch/crates/stdarch-verify/tests/arm.rs index 2242bf4264e57..6ba9c17c4804c 100644 --- a/library/stdarch/crates/stdarch-verify/tests/arm.rs +++ b/library/stdarch/crates/stdarch-verify/tests/arm.rs @@ -5145,4 +5145,128 @@ static SKIP_RUNTIME_TESTS: &'static [&'static str] = &[ "vzipq_p16", "__rndr", "__rndrrs", + "vcopy_laneq_f64", + "vcopy_laneq_f64", + "vcopy_laneq_s64", + "vcopy_laneq_s64", + "vcopy_laneq_u64", + "vcopy_laneq_u64", + "vcopy_laneq_p64", + "vcopy_laneq_p64", + "vget_high_f64", + "vget_high_f64", + "vget_high_p64", + "vget_high_p64", + "vget_low_f64", + "vget_low_f64", + "vget_low_p64", + "vget_low_p64", + "vgetq_lane_f64", + "vgetq_lane_f64", + "vaddl_high_s16", + "vaddl_high_s16", + "vaddl_high_s32", + "vaddl_high_s32", + "vaddl_high_s8", + "vaddl_high_s8", + "vaddl_high_u16", + "vaddl_high_u16", + "vaddl_high_u32", + "vaddl_high_u32", + "vaddl_high_u8", + "vaddl_high_u8", + "vget_high_f32", + "vget_high_f32", + "vget_high_p16", + "vget_high_p16", + "vget_high_p8", + "vget_high_p8", + "vget_high_s16", + "vget_high_s16", + "vget_high_s32", + "vget_high_s32", + "vget_high_s8", + "vget_high_s8", + "vget_high_u16", + "vget_high_u16", + "vget_high_u32", + "vget_high_u32", + "vget_high_u8", + "vget_high_u8", + "vget_high_s64", + "vget_high_s64", + "vget_high_u64", + "vget_high_u64", + "vget_lane_f32", + "vget_lane_f32", + "vget_lane_p16", + "vget_lane_p16", + "vget_lane_p8", + "vget_lane_p8", + "vget_lane_s16", + "vget_lane_s16", + "vget_lane_s32", + "vget_lane_s32", + "vget_lane_s8", + "vget_lane_s8", + "vget_lane_u16", + "vget_lane_u16", + "vget_lane_u32", + "vget_lane_u32", + "vget_lane_u8", + "vget_lane_u8", + "vgetq_lane_f32", + "vgetq_lane_f32", + "vgetq_lane_p16", + "vgetq_lane_p16", + "vgetq_lane_p64", + "vgetq_lane_p64", + "vgetq_lane_p8", + "vgetq_lane_p8", + "vgetq_lane_s16", + "vgetq_lane_s16", + "vgetq_lane_s32", + "vgetq_lane_s32", + "vgetq_lane_s64", + "vgetq_lane_s64", + "vgetq_lane_s8", + "vgetq_lane_s8", + "vgetq_lane_u16", + "vgetq_lane_u16", + "vgetq_lane_u32", + "vgetq_lane_u32", + "vgetq_lane_u8", + "vgetq_lane_u8", + "vget_lane_p64", + "vget_lane_s64", + "vget_lane_u64", + "vget_low_f32", + "vget_low_f32", + "vget_low_p16", + "vget_low_p16", + "vget_low_p8", + "vget_low_p8", + "vget_low_s16", + "vget_low_s16", + "vget_low_s32", + "vget_low_s32", + "vget_low_s8", + "vget_low_s8", + "vget_low_u16", + "vget_low_u16", + "vget_low_u32", + "vget_low_u32", + "vget_low_u8", + "vget_low_u8", + "vget_low_s64", + "vget_low_s64", + "vget_low_u64", + "vget_low_u64", + "vaddw_high_s16", + "vaddw_high_s32", + "vaddw_high_s8", + "vaddw_high_u16", + "vaddw_high_u32", + "vaddw_high_u8", + "vgetq_lane_u64", ]; diff --git a/library/stdarch/triagebot.toml b/library/stdarch/triagebot.toml index 621f39b2cbbf3..5b178f0cdf456 100644 --- a/library/stdarch/triagebot.toml +++ b/library/stdarch/triagebot.toml @@ -1,7 +1,7 @@ [assign] [assign.owners] -"*" = ["@Amanieu", "@folkertdev", "@sayantn"] +"*" = ["@Amanieu", "@folkertdev", "@sayantn", "@davidtwco", "@adamgemmell"] [ping.windows] message = """\ diff --git a/src/bootstrap/Cargo.lock b/src/bootstrap/Cargo.lock index e22fde79a8444..7c890e3f2004c 100644 --- a/src/bootstrap/Cargo.lock +++ b/src/bootstrap/Cargo.lock @@ -742,9 +742,9 @@ dependencies = [ [[package]] name = "sysinfo" -version = "0.39.0" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd9f9fe3d2b7b75cf4f2805e5b9926e8ac47146667b16b86298c4a8bf08cc469" +checksum = "14311e7e9a03114cd4b65eedd54e8fed2945e17f08586ae97ef53bc0669f9581" dependencies = [ "libc", "memchr", diff --git a/src/bootstrap/Cargo.toml b/src/bootstrap/Cargo.toml index 363802093a13a..f4b89e8a28b0f 100644 --- a/src/bootstrap/Cargo.toml +++ b/src/bootstrap/Cargo.toml @@ -57,7 +57,7 @@ walkdir = "2.4" xz2 = "0.1" # Dependencies needed by the build-metrics feature -sysinfo = { version = "0.39.0", default-features = false, optional = true, features = ["system"] } +sysinfo = { version = "0.39.2", default-features = false, optional = true, features = ["system"] } # Dependencies needed by the `tracing` feature chrono = { version = "0.4", default-features = false, optional = true, features = ["now", "std"] } diff --git a/src/doc/rustc/src/check-cfg.md b/src/doc/rustc/src/check-cfg.md index dfe036bf1bb19..ef791b57895ab 100644 --- a/src/doc/rustc/src/check-cfg.md +++ b/src/doc/rustc/src/check-cfg.md @@ -99,7 +99,7 @@ the need to specify them manually. Well known names and values are implicitly added as long as at least one `--check-cfg` argument is present. -As of `2025-01-02T`, the list of known names is as follows: +As of `2026-05-15T`, the list of known names is as follows: @@ -122,15 +122,17 @@ As of `2025-01-02T`, the list of known names is as follows: - `target_endian` - `target_env` - `target_family` + - `target_object_format` - `target_feature` - `target_has_atomic` - - `target_has_atomic_equal_alignment` + - `target_has_atomic_primitive_alignment` - `target_has_atomic_load_store` - `target_os` - `target_pointer_width` - `target_thread_local` - `target_vendor` - `ub_checks` + - `contract_checks` - `unix` - `windows` diff --git a/src/tools/opt-dist/Cargo.toml b/src/tools/opt-dist/Cargo.toml index d80e6caac0885..5893bf87b5c34 100644 --- a/src/tools/opt-dist/Cargo.toml +++ b/src/tools/opt-dist/Cargo.toml @@ -10,7 +10,7 @@ log = "0.4" anyhow = "1" humantime = "2" humansize = "2" -sysinfo = { version = "0.39.0", default-features = false, features = ["disk"] } +sysinfo = { version = "0.39.2", default-features = false, features = ["disk"] } fs_extra = "1" camino = "1" tar = "0.4.45" diff --git a/tests/incremental/lint-unused-features.rs b/tests/incremental/lint-unused-features.rs index 898bb19326018..0bf9730727f74 100644 --- a/tests/incremental/lint-unused-features.rs +++ b/tests/incremental/lint-unused-features.rs @@ -4,7 +4,6 @@ #![deny(unused_features)] // Used language features -#![feature(box_patterns)] #![feature(decl_macro)] #![cfg_attr(all(), feature(rustc_attrs))] @@ -14,11 +13,6 @@ #![cfg_attr(all(), feature(allocator_api))] //[bfail]~^ ERROR feature `allocator_api` is declared but not used -pub fn use_box_patterns(b: Box) -> i32 { - let box x = b; - x -} - macro m() {} pub fn use_decl_macro() { m!(); diff --git a/tests/run-make/unstable-feature-usage-metrics-incremental/main.rs b/tests/run-make/unstable-feature-usage-metrics-incremental/main.rs index f970d395b2cf2..ca33275e3e389 100644 --- a/tests/run-make/unstable-feature-usage-metrics-incremental/main.rs +++ b/tests/run-make/unstable-feature-usage-metrics-incremental/main.rs @@ -1,5 +1,5 @@ #![feature(ascii_char)] // random lib feature -#![feature(box_patterns)] // random lang feature +#![feature(test_unstable_lint)] // random lang feature // picked arbitrary unstable features, just need a random lib and lang feature, ideally ones that // won't be stabilized any time soon so we don't have to update this test diff --git a/tests/run-make/unstable-feature-usage-metrics-incremental/rmake.rs b/tests/run-make/unstable-feature-usage-metrics-incremental/rmake.rs index 862b2bd5300c3..61ad07717b658 100644 --- a/tests/run-make/unstable-feature-usage-metrics-incremental/rmake.rs +++ b/tests/run-make/unstable-feature-usage-metrics-incremental/rmake.rs @@ -69,7 +69,7 @@ fn test_metrics_dump() { let expected = serde_json::json!( { "lib_features":[{"symbol":"ascii_char", "timestamp":null}], - "lang_features":[{"symbol":"box_patterns","since":null, "timestamp":null}] + "lang_features":[{"symbol":"test_unstable_lint","since":null, "timestamp":null}] } ); diff --git a/tests/run-make/unstable-feature-usage-metrics/lib.rs b/tests/run-make/unstable-feature-usage-metrics/lib.rs index 2202d722c497e..488c00045c454 100644 --- a/tests/run-make/unstable-feature-usage-metrics/lib.rs +++ b/tests/run-make/unstable-feature-usage-metrics/lib.rs @@ -1,5 +1,5 @@ #![feature(ascii_char)] // random lib feature -#![feature(box_patterns)] // random lang feature +#![feature(test_unstable_lint)] // random lang feature // picked arbitrary unstable features, just need a random lib and lang feature, ideally ones that // won't be stabilized any time soon so we don't have to update this test diff --git a/tests/run-make/unstable-feature-usage-metrics/rmake.rs b/tests/run-make/unstable-feature-usage-metrics/rmake.rs index f987829741c72..85ea50e7065ab 100644 --- a/tests/run-make/unstable-feature-usage-metrics/rmake.rs +++ b/tests/run-make/unstable-feature-usage-metrics/rmake.rs @@ -67,7 +67,7 @@ fn test_metrics_dump() { let expected = serde_json::json!( { "lib_features":[{"symbol":"ascii_char", "timestamp":null}], - "lang_features":[{"symbol":"box_patterns","since":null, "timestamp":null}] + "lang_features":[{"symbol":"test_unstable_lint","since":null, "timestamp":null}] } ); diff --git a/tests/ui/asm/naked-functions.rs b/tests/ui/asm/naked-functions.rs index 55f2f552ad31c..abe26ca91e5d8 100644 --- a/tests/ui/asm/naked-functions.rs +++ b/tests/ui/asm/naked-functions.rs @@ -6,7 +6,7 @@ #![feature(asm_unwind, linkage, rustc_attrs, cfg_target_object_format)] #![crate_type = "lib"] -use std::arch::{asm, naked_asm}; +use std::arch::{asm, global_asm, naked_asm}; #[unsafe(naked)] pub extern "C" fn inline_asm_macro() { @@ -14,6 +14,12 @@ pub extern "C" fn inline_asm_macro() { //~^ERROR the `asm!` macro is not allowed in naked functions } +#[unsafe(naked)] +pub extern "C" fn global_asm_macro() { + //~^ERROR naked functions must contain a single `naked_asm!` invocation + global_asm!(""); +} + #[repr(C)] pub struct P { x: u8, diff --git a/tests/ui/asm/naked-functions.stderr b/tests/ui/asm/naked-functions.stderr index 2b67c3aecd73c..e8963f7e1d52c 100644 --- a/tests/ui/asm/naked-functions.stderr +++ b/tests/ui/asm/naked-functions.stderr @@ -1,71 +1,71 @@ error: the `in` operand cannot be used with `naked_asm!` - --> $DIR/naked-functions.rs:47:29 + --> $DIR/naked-functions.rs:53:29 | LL | naked_asm!("/* {0} */", in(reg) a) | ^^ the `in` operand is not meaningful for global-scoped inline assembly, remove it error: the `in` operand cannot be used with `naked_asm!` - --> $DIR/naked-functions.rs:68:10 + --> $DIR/naked-functions.rs:74:10 | LL | in(reg) a, | ^^ the `in` operand is not meaningful for global-scoped inline assembly, remove it error: the `noreturn` option cannot be used with `naked_asm!` - --> $DIR/naked-functions.rs:88:28 + --> $DIR/naked-functions.rs:94:28 | LL | naked_asm!("", options(noreturn)); | ^^^^^^^^ the `noreturn` option is not meaningful for global-scoped inline assembly error: the `nomem` option cannot be used with `naked_asm!` - --> $DIR/naked-functions.rs:105:28 + --> $DIR/naked-functions.rs:111:28 | LL | naked_asm!("", options(nomem, preserves_flags)); | ^^^^^ the `nomem` option is not meaningful for global-scoped inline assembly error: the `preserves_flags` option cannot be used with `naked_asm!` - --> $DIR/naked-functions.rs:105:35 + --> $DIR/naked-functions.rs:111:35 | LL | naked_asm!("", options(nomem, preserves_flags)); | ^^^^^^^^^^^^^^^ the `preserves_flags` option is not meaningful for global-scoped inline assembly error: the `readonly` option cannot be used with `naked_asm!` - --> $DIR/naked-functions.rs:112:28 + --> $DIR/naked-functions.rs:118:28 | LL | naked_asm!("", options(readonly, nostack), options(pure)); | ^^^^^^^^ the `readonly` option is not meaningful for global-scoped inline assembly error: the `nostack` option cannot be used with `naked_asm!` - --> $DIR/naked-functions.rs:112:38 + --> $DIR/naked-functions.rs:118:38 | LL | naked_asm!("", options(readonly, nostack), options(pure)); | ^^^^^^^ the `nostack` option is not meaningful for global-scoped inline assembly error: the `pure` option cannot be used with `naked_asm!` - --> $DIR/naked-functions.rs:112:56 + --> $DIR/naked-functions.rs:118:56 | LL | naked_asm!("", options(readonly, nostack), options(pure)); | ^^^^ the `pure` option is not meaningful for global-scoped inline assembly error: the `may_unwind` option cannot be used with `naked_asm!` - --> $DIR/naked-functions.rs:120:28 + --> $DIR/naked-functions.rs:126:28 | LL | naked_asm!("", options(may_unwind)); | ^^^^^^^^^^ the `may_unwind` option is not meaningful for global-scoped inline assembly error: this is a user specified error - --> $DIR/naked-functions.rs:151:5 + --> $DIR/naked-functions.rs:157:5 | LL | compile_error!("this is a user specified error") | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ error: this is a user specified error - --> $DIR/naked-functions.rs:157:5 + --> $DIR/naked-functions.rs:163:5 | LL | compile_error!("this is a user specified error"); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ error: asm template must be a string literal - --> $DIR/naked-functions.rs:164:16 + --> $DIR/naked-functions.rs:170:16 | LL | naked_asm!(invalid_syntax) | ^^^^^^^^^^^^^^ @@ -76,32 +76,38 @@ error[E0787]: the `asm!` macro is not allowed in naked functions LL | unsafe { asm!("", options(raw)) }; | ^^^^^^^^^^^^^^^^^^^^^^ consider using the `naked_asm!` macro instead +error[E0787]: naked functions must contain a single `naked_asm!` invocation + --> $DIR/naked-functions.rs:18:1 + | +LL | pub extern "C" fn global_asm_macro() { + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + error: patterns not allowed in naked function parameters - --> $DIR/naked-functions.rs:25:5 + --> $DIR/naked-functions.rs:31:5 | LL | mut a: u32, | ^^^^^ error: patterns not allowed in naked function parameters - --> $DIR/naked-functions.rs:27:5 + --> $DIR/naked-functions.rs:33:5 | LL | &b: &i32, | ^^ error: patterns not allowed in naked function parameters - --> $DIR/naked-functions.rs:29:6 + --> $DIR/naked-functions.rs:35:6 | LL | (None | Some(_)): Option>, | ^^^^^^^^^^^^^^ error: patterns not allowed in naked function parameters - --> $DIR/naked-functions.rs:31:5 + --> $DIR/naked-functions.rs:37:5 | LL | P { x, y }: P, | ^^^^^^^^^^ error: referencing function parameters is not allowed in naked functions - --> $DIR/naked-functions.rs:40:5 + --> $DIR/naked-functions.rs:46:5 | LL | a + 1 | ^ @@ -109,7 +115,7 @@ LL | a + 1 = help: follow the calling convention in asm block to use parameters error[E0787]: naked functions must contain a single `naked_asm!` invocation - --> $DIR/naked-functions.rs:38:1 + --> $DIR/naked-functions.rs:44:1 | LL | pub extern "C" fn inc(a: u32) -> u32 { | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -118,7 +124,7 @@ LL | a + 1 | ----- not allowed in naked functions error[E0787]: naked functions must contain a single `naked_asm!` invocation - --> $DIR/naked-functions.rs:52:1 + --> $DIR/naked-functions.rs:58:1 | LL | pub extern "C" fn inc_closure(a: u32) -> u32 { | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -127,7 +133,7 @@ LL | (|| a + 1)() | ------------ not allowed in naked functions error[E0787]: naked functions must contain a single `naked_asm!` invocation - --> $DIR/naked-functions.rs:58:1 + --> $DIR/naked-functions.rs:64:1 | LL | pub extern "C" fn unsupported_operands() { | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -144,13 +150,13 @@ LL | let mut e = 0usize; | ------------------- not allowed in naked functions error[E0787]: naked functions must contain a single `naked_asm!` invocation - --> $DIR/naked-functions.rs:80:1 + --> $DIR/naked-functions.rs:86:1 | LL | pub extern "C" fn missing_assembly() { | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ error[E0787]: naked functions must contain a single `naked_asm!` invocation - --> $DIR/naked-functions.rs:85:1 + --> $DIR/naked-functions.rs:91:1 | LL | pub extern "C" fn too_many_asm_blocks() { | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -159,7 +165,7 @@ LL | naked_asm!(""); | -------------- multiple `naked_asm!` invocations are not allowed in naked functions error: referencing function parameters is not allowed in naked functions - --> $DIR/naked-functions.rs:97:11 + --> $DIR/naked-functions.rs:103:11 | LL | *&y | ^ @@ -167,7 +173,7 @@ LL | *&y = help: follow the calling convention in asm block to use parameters error[E0787]: naked functions must contain a single `naked_asm!` invocation - --> $DIR/naked-functions.rs:95:5 + --> $DIR/naked-functions.rs:101:5 | LL | pub extern "C" fn inner(y: usize) -> usize { | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -175,6 +181,6 @@ LL | LL | *&y | --- not allowed in naked functions -error: aborting due to 25 previous errors +error: aborting due to 26 previous errors For more information about this error, try `rustc --explain E0787`. diff --git a/tests/ui/asm/statement-global-asm-error.rs b/tests/ui/asm/statement-global-asm-error.rs new file mode 100644 index 0000000000000..f32ba73357a19 --- /dev/null +++ b/tests/ui/asm/statement-global-asm-error.rs @@ -0,0 +1,13 @@ +//@ needs-asm-support + +use std::arch::global_asm; + +fn main() { + let x = 42; + global_asm!("{}", in(x)); + //~^ ERROR the `in` operand cannot be used with `global_asm!` + //~^^ NOTE the `in` operand is not meaningful for global-scoped inline assembly, remove it + + let y = global_asm!(""); + //~^ ERROR non-expression macro in expression position: global_asm +} diff --git a/tests/ui/asm/statement-global-asm-error.stderr b/tests/ui/asm/statement-global-asm-error.stderr new file mode 100644 index 0000000000000..35dde6ac1a781 --- /dev/null +++ b/tests/ui/asm/statement-global-asm-error.stderr @@ -0,0 +1,14 @@ +error: the `in` operand cannot be used with `global_asm!` + --> $DIR/statement-global-asm-error.rs:7:23 + | +LL | global_asm!("{}", in(x)); + | ^^ the `in` operand is not meaningful for global-scoped inline assembly, remove it + +error: non-expression macro in expression position: global_asm + --> $DIR/statement-global-asm-error.rs:11:13 + | +LL | let y = global_asm!(""); + | ^^^^^^^^^^^^^^^ + +error: aborting due to 2 previous errors + diff --git a/tests/ui/asm/statement-global-asm.rs b/tests/ui/asm/statement-global-asm.rs new file mode 100644 index 0000000000000..597495546d955 --- /dev/null +++ b/tests/ui/asm/statement-global-asm.rs @@ -0,0 +1,8 @@ +//@ needs-asm-support +//@ run-pass + +use std::arch::global_asm; + +fn main() { + global_asm!(""); +} diff --git a/tests/ui/binding/match-unique-bind.rs b/tests/ui/binding/match-unique-bind.rs deleted file mode 100644 index 02f1945f5718f..0000000000000 --- a/tests/ui/binding/match-unique-bind.rs +++ /dev/null @@ -1,11 +0,0 @@ -//@ run-pass -#![feature(box_patterns)] - -pub fn main() { - match Box::new(100) { - box x => { - println!("{}", x); - assert_eq!(x, 100); - } - } -} diff --git a/tests/ui/box/unit/unique-destructure.rs b/tests/ui/box/unit/unique-destructure.rs deleted file mode 100644 index 2ddb3c452cd59..0000000000000 --- a/tests/ui/box/unit/unique-destructure.rs +++ /dev/null @@ -1,9 +0,0 @@ -//@ run-pass -#![feature(box_patterns)] - -struct Foo { a: isize, b: isize } - -pub fn main() { - let box Foo{ a, b } = Box::new(Foo { a: 100, b: 200 }); - assert_eq!(a + b, 300); -} diff --git a/tests/ui/box/unit/unique-pat-2.rs b/tests/ui/box/unit/unique-pat-2.rs deleted file mode 100644 index 85f0fbd5e4f13..0000000000000 --- a/tests/ui/box/unit/unique-pat-2.rs +++ /dev/null @@ -1,18 +0,0 @@ -//@ run-pass -#![allow(dead_code)] -#![allow(non_camel_case_types)] -#![allow(non_shorthand_field_patterns)] - -#![feature(box_patterns)] - -struct Foo {a: isize, b: usize} - -enum bar { u(Box), w(isize), } - -pub fn main() { - let v = match bar::u(Box::new(Foo{ a: 10, b: 40 })) { - bar::u(box Foo{ a: a, b: b }) => { a + (b as isize) } - _ => { 66 } - }; - assert_eq!(v, 50); -} diff --git a/tests/ui/cfg/disallowed-cli-cfgs.rs b/tests/ui/cfg/disallowed-cli-cfgs.rs index 1ce65a7d657e6..81f3d9313ef1f 100644 --- a/tests/ui/cfg/disallowed-cli-cfgs.rs +++ b/tests/ui/cfg/disallowed-cli-cfgs.rs @@ -1,10 +1,12 @@ +// ignore-tidy-linelength (target_has_atomic_primitive_alignment below overflows the linelength limit and @ [revision]compile-flags isn't detected by tidy as something to ignore) + //@ check-fail //@ revisions: overflow_checks_ debug_assertions_ ub_checks_ sanitize_ //@ revisions: sanitizer_cfi_generalize_pointers_ sanitizer_cfi_normalize_integers_ //@ revisions: proc_macro_ panic_ target_feature_ unix_ windows_ target_abi_ //@ revisions: target_arch_ target_endian_ target_env_ target_family_ target_os_ //@ revisions: target_object_format_ target_pointer_width_ target_vendor_ -//@ revisions: target_has_atomic_ target_has_atomic_equal_alignment_ +//@ revisions: target_has_atomic_ target_has_atomic_primitive_alignment_ //@ revisions: target_has_atomic_load_store_ target_thread_local_ relocation_model_ //@ revisions: fmt_debug_ //@ revisions: emscripten_wasm_eh_ @@ -31,7 +33,7 @@ //@ [target_pointer_width_]compile-flags: --cfg target_pointer_width="32" //@ [target_vendor_]compile-flags: --cfg target_vendor //@ [target_has_atomic_]compile-flags: --cfg target_has_atomic="32" -//@ [target_has_atomic_equal_alignment_]compile-flags: --cfg target_has_atomic_equal_alignment="32" +//@ [target_has_atomic_primitive_alignment_]compile-flags: --cfg target_has_atomic_primitive_alignment="32" //@ [target_has_atomic_load_store_]compile-flags: --cfg target_has_atomic_load_store="32" //@ [target_thread_local_]compile-flags: --cfg target_thread_local //@ [relocation_model_]compile-flags: --cfg relocation_model="a" diff --git a/tests/ui/cfg/disallowed-cli-cfgs.target_has_atomic_equal_alignment_.stderr b/tests/ui/cfg/disallowed-cli-cfgs.target_has_atomic_equal_alignment_.stderr index 096490a03f610..3cdc06822700c 100644 --- a/tests/ui/cfg/disallowed-cli-cfgs.target_has_atomic_equal_alignment_.stderr +++ b/tests/ui/cfg/disallowed-cli-cfgs.target_has_atomic_equal_alignment_.stderr @@ -1,6 +1,6 @@ -error: unexpected `--cfg target_has_atomic_equal_alignment="32"` flag +error: unexpected `--cfg target_has_atomic_primitive_alignment="32"` flag | - = note: config `target_has_atomic_equal_alignment` is only supposed to be controlled by `--target` + = note: config `target_has_atomic_primitive_alignment` is only supposed to be controlled by `--target` = note: manually setting a built-in cfg can and does create incoherent behaviors = note: `#[deny(explicit_builtin_cfgs_in_flags)]` on by default diff --git a/tests/ui/cfg/disallowed-cli-cfgs.target_has_atomic_primitive_alignment_.stderr b/tests/ui/cfg/disallowed-cli-cfgs.target_has_atomic_primitive_alignment_.stderr new file mode 100644 index 0000000000000..3cdc06822700c --- /dev/null +++ b/tests/ui/cfg/disallowed-cli-cfgs.target_has_atomic_primitive_alignment_.stderr @@ -0,0 +1,8 @@ +error: unexpected `--cfg target_has_atomic_primitive_alignment="32"` flag + | + = note: config `target_has_atomic_primitive_alignment` is only supposed to be controlled by `--target` + = note: manually setting a built-in cfg can and does create incoherent behaviors + = note: `#[deny(explicit_builtin_cfgs_in_flags)]` on by default + +error: aborting due to 1 previous error + diff --git a/tests/ui/check-cfg/well-known-names.stderr b/tests/ui/check-cfg/well-known-names.stderr index d946377f2616b..ec1345fd6b3cd 100644 --- a/tests/ui/check-cfg/well-known-names.stderr +++ b/tests/ui/check-cfg/well-known-names.stderr @@ -26,8 +26,8 @@ LL | #[cfg(list_all_well_known_cfgs)] `target_family` `target_feature` `target_has_atomic` -`target_has_atomic_equal_alignment` `target_has_atomic_load_store` +`target_has_atomic_primitive_alignment` `target_object_format` `target_os` `target_pointer_width` diff --git a/tests/ui/check-cfg/well-known-values.rs b/tests/ui/check-cfg/well-known-values.rs index f48438d142467..5f961989d720f 100644 --- a/tests/ui/check-cfg/well-known-values.rs +++ b/tests/ui/check-cfg/well-known-values.rs @@ -13,7 +13,6 @@ #![feature(cfg_relocation_model)] #![feature(cfg_sanitize)] #![feature(cfg_target_has_atomic)] -#![feature(cfg_target_has_atomic_equal_alignment)] #![feature(cfg_target_thread_local)] #![feature(cfg_target_object_format)] #![feature(cfg_ub_checks)] @@ -65,10 +64,10 @@ // ^ tested in target_feature.rs target_has_atomic = "_UNEXPECTED_VALUE", //~^ WARN unexpected `cfg` condition value - target_has_atomic_equal_alignment = "_UNEXPECTED_VALUE", - //~^ WARN unexpected `cfg` condition value target_has_atomic_load_store = "_UNEXPECTED_VALUE", //~^ WARN unexpected `cfg` condition value + target_has_atomic_primitive_alignment = "_UNEXPECTED_VALUE", + //~^ WARN unexpected `cfg` condition value target_object_format = "_UNEXPECTED_VALUE", //~^ WARN unexpected `cfg` condition value target_os = "_UNEXPECTED_VALUE", diff --git a/tests/ui/check-cfg/well-known-values.stderr b/tests/ui/check-cfg/well-known-values.stderr index dd1b696b76cb2..47ed9891a1a6c 100644 --- a/tests/ui/check-cfg/well-known-values.stderr +++ b/tests/ui/check-cfg/well-known-values.stderr @@ -1,5 +1,5 @@ warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE` - --> $DIR/well-known-values.rs:30:5 + --> $DIR/well-known-values.rs:29:5 | LL | clippy = "_UNEXPECTED_VALUE", | ^^^^^^---------------------- @@ -11,7 +11,7 @@ LL | clippy = "_UNEXPECTED_VALUE", = note: `#[warn(unexpected_cfgs)]` on by default warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE` - --> $DIR/well-known-values.rs:32:5 + --> $DIR/well-known-values.rs:31:5 | LL | debug_assertions = "_UNEXPECTED_VALUE", | ^^^^^^^^^^^^^^^^---------------------- @@ -22,7 +22,7 @@ LL | debug_assertions = "_UNEXPECTED_VALUE", = note: see for more information about checking conditional configuration warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE` - --> $DIR/well-known-values.rs:34:5 + --> $DIR/well-known-values.rs:33:5 | LL | doc = "_UNEXPECTED_VALUE", | ^^^---------------------- @@ -33,7 +33,7 @@ LL | doc = "_UNEXPECTED_VALUE", = note: see for more information about checking conditional configuration warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE` - --> $DIR/well-known-values.rs:36:5 + --> $DIR/well-known-values.rs:35:5 | LL | doctest = "_UNEXPECTED_VALUE", | ^^^^^^^---------------------- @@ -44,7 +44,7 @@ LL | doctest = "_UNEXPECTED_VALUE", = note: see for more information about checking conditional configuration warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE` - --> $DIR/well-known-values.rs:38:5 + --> $DIR/well-known-values.rs:37:5 | LL | fmt_debug = "_UNEXPECTED_VALUE", | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -53,7 +53,7 @@ LL | fmt_debug = "_UNEXPECTED_VALUE", = note: see for more information about checking conditional configuration warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE` - --> $DIR/well-known-values.rs:40:5 + --> $DIR/well-known-values.rs:39:5 | LL | miri = "_UNEXPECTED_VALUE", | ^^^^---------------------- @@ -64,7 +64,7 @@ LL | miri = "_UNEXPECTED_VALUE", = note: see for more information about checking conditional configuration warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE` - --> $DIR/well-known-values.rs:42:5 + --> $DIR/well-known-values.rs:41:5 | LL | overflow_checks = "_UNEXPECTED_VALUE", | ^^^^^^^^^^^^^^^---------------------- @@ -75,7 +75,7 @@ LL | overflow_checks = "_UNEXPECTED_VALUE", = note: see for more information about checking conditional configuration warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE` - --> $DIR/well-known-values.rs:44:5 + --> $DIR/well-known-values.rs:43:5 | LL | panic = "_UNEXPECTED_VALUE", | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -84,7 +84,7 @@ LL | panic = "_UNEXPECTED_VALUE", = note: see for more information about checking conditional configuration warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE` - --> $DIR/well-known-values.rs:46:5 + --> $DIR/well-known-values.rs:45:5 | LL | proc_macro = "_UNEXPECTED_VALUE", | ^^^^^^^^^^---------------------- @@ -95,7 +95,7 @@ LL | proc_macro = "_UNEXPECTED_VALUE", = note: see for more information about checking conditional configuration warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE` - --> $DIR/well-known-values.rs:48:5 + --> $DIR/well-known-values.rs:47:5 | LL | relocation_model = "_UNEXPECTED_VALUE", | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -104,7 +104,7 @@ LL | relocation_model = "_UNEXPECTED_VALUE", = note: see for more information about checking conditional configuration warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE` - --> $DIR/well-known-values.rs:50:5 + --> $DIR/well-known-values.rs:49:5 | LL | rustfmt = "_UNEXPECTED_VALUE", | ^^^^^^^---------------------- @@ -115,7 +115,7 @@ LL | rustfmt = "_UNEXPECTED_VALUE", = note: see for more information about checking conditional configuration warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE` - --> $DIR/well-known-values.rs:52:5 + --> $DIR/well-known-values.rs:51:5 | LL | sanitize = "_UNEXPECTED_VALUE", | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -124,7 +124,7 @@ LL | sanitize = "_UNEXPECTED_VALUE", = note: see for more information about checking conditional configuration warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE` - --> $DIR/well-known-values.rs:54:5 + --> $DIR/well-known-values.rs:53:5 | LL | target_abi = "_UNEXPECTED_VALUE", | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -133,7 +133,7 @@ LL | target_abi = "_UNEXPECTED_VALUE", = note: see for more information about checking conditional configuration warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE` - --> $DIR/well-known-values.rs:56:5 + --> $DIR/well-known-values.rs:55:5 | LL | target_arch = "_UNEXPECTED_VALUE", | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -142,7 +142,7 @@ LL | target_arch = "_UNEXPECTED_VALUE", = note: see for more information about checking conditional configuration warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE` - --> $DIR/well-known-values.rs:58:5 + --> $DIR/well-known-values.rs:57:5 | LL | target_endian = "_UNEXPECTED_VALUE", | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -151,7 +151,7 @@ LL | target_endian = "_UNEXPECTED_VALUE", = note: see for more information about checking conditional configuration warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE` - --> $DIR/well-known-values.rs:60:5 + --> $DIR/well-known-values.rs:59:5 | LL | target_env = "_UNEXPECTED_VALUE", | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -160,7 +160,7 @@ LL | target_env = "_UNEXPECTED_VALUE", = note: see for more information about checking conditional configuration warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE` - --> $DIR/well-known-values.rs:62:5 + --> $DIR/well-known-values.rs:61:5 | LL | target_family = "_UNEXPECTED_VALUE", | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -169,7 +169,7 @@ LL | target_family = "_UNEXPECTED_VALUE", = note: see for more information about checking conditional configuration warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE` - --> $DIR/well-known-values.rs:66:5 + --> $DIR/well-known-values.rs:65:5 | LL | target_has_atomic = "_UNEXPECTED_VALUE", | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -178,25 +178,25 @@ LL | target_has_atomic = "_UNEXPECTED_VALUE", = note: see for more information about checking conditional configuration warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE` - --> $DIR/well-known-values.rs:68:5 + --> $DIR/well-known-values.rs:67:5 | -LL | target_has_atomic_equal_alignment = "_UNEXPECTED_VALUE", - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +LL | target_has_atomic_load_store = "_UNEXPECTED_VALUE", + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | - = note: expected values for `target_has_atomic_equal_alignment` are: (none), `128`, `16`, `32`, `64`, `8`, and `ptr` + = note: expected values for `target_has_atomic_load_store` are: (none), `128`, `16`, `32`, `64`, `8`, and `ptr` = note: see for more information about checking conditional configuration warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE` - --> $DIR/well-known-values.rs:70:5 + --> $DIR/well-known-values.rs:69:5 | -LL | target_has_atomic_load_store = "_UNEXPECTED_VALUE", - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +LL | target_has_atomic_primitive_alignment = "_UNEXPECTED_VALUE", + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | - = note: expected values for `target_has_atomic_load_store` are: (none), `128`, `16`, `32`, `64`, `8`, and `ptr` + = note: expected values for `target_has_atomic_primitive_alignment` are: `128`, `16`, `32`, `64`, `8`, and `ptr` = note: see for more information about checking conditional configuration warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE` - --> $DIR/well-known-values.rs:72:5 + --> $DIR/well-known-values.rs:71:5 | LL | target_object_format = "_UNEXPECTED_VALUE", | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -205,7 +205,7 @@ LL | target_object_format = "_UNEXPECTED_VALUE", = note: see for more information about checking conditional configuration warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE` - --> $DIR/well-known-values.rs:74:5 + --> $DIR/well-known-values.rs:73:5 | LL | target_os = "_UNEXPECTED_VALUE", | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -214,7 +214,7 @@ LL | target_os = "_UNEXPECTED_VALUE", = note: see for more information about checking conditional configuration warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE` - --> $DIR/well-known-values.rs:76:5 + --> $DIR/well-known-values.rs:75:5 | LL | target_pointer_width = "_UNEXPECTED_VALUE", | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -223,7 +223,7 @@ LL | target_pointer_width = "_UNEXPECTED_VALUE", = note: see for more information about checking conditional configuration warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE` - --> $DIR/well-known-values.rs:78:5 + --> $DIR/well-known-values.rs:77:5 | LL | target_thread_local = "_UNEXPECTED_VALUE", | ^^^^^^^^^^^^^^^^^^^---------------------- @@ -234,7 +234,7 @@ LL | target_thread_local = "_UNEXPECTED_VALUE", = note: see for more information about checking conditional configuration warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE` - --> $DIR/well-known-values.rs:80:5 + --> $DIR/well-known-values.rs:79:5 | LL | target_vendor = "_UNEXPECTED_VALUE", | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -243,7 +243,7 @@ LL | target_vendor = "_UNEXPECTED_VALUE", = note: see for more information about checking conditional configuration warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE` - --> $DIR/well-known-values.rs:82:5 + --> $DIR/well-known-values.rs:81:5 | LL | ub_checks = "_UNEXPECTED_VALUE", | ^^^^^^^^^---------------------- @@ -254,7 +254,7 @@ LL | ub_checks = "_UNEXPECTED_VALUE", = note: see for more information about checking conditional configuration warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE` - --> $DIR/well-known-values.rs:84:5 + --> $DIR/well-known-values.rs:83:5 | LL | unix = "_UNEXPECTED_VALUE", | ^^^^---------------------- @@ -265,7 +265,7 @@ LL | unix = "_UNEXPECTED_VALUE", = note: see for more information about checking conditional configuration warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE` - --> $DIR/well-known-values.rs:86:5 + --> $DIR/well-known-values.rs:85:5 | LL | windows = "_UNEXPECTED_VALUE", | ^^^^^^^---------------------- @@ -276,7 +276,7 @@ LL | windows = "_UNEXPECTED_VALUE", = note: see for more information about checking conditional configuration warning: unexpected `cfg` condition value: `linuz` - --> $DIR/well-known-values.rs:92:7 + --> $DIR/well-known-values.rs:91:7 | LL | #[cfg(target_os = "linuz")] // testing that we suggest `linux` | ^^^^^^^^^^^^------- diff --git a/tests/ui/deref/derefmut-closure-drop-order.rs b/tests/ui/deref/derefmut-closure-drop-order.rs deleted file mode 100644 index b95ae68f1734f..0000000000000 --- a/tests/ui/deref/derefmut-closure-drop-order.rs +++ /dev/null @@ -1,47 +0,0 @@ -//! Regression test for https://github.com/rust-lang/rust/issues/16774 - -//@ run-pass -#![feature(box_patterns)] - -use std::ops::{Deref, DerefMut}; - -struct X(Box); - -static mut DESTRUCTOR_RAN: bool = false; - -impl Drop for X { - fn drop(&mut self) { - unsafe { - assert!(!DESTRUCTOR_RAN); - DESTRUCTOR_RAN = true; - } - } -} - -impl Deref for X { - type Target = isize; - - fn deref(&self) -> &isize { - let &X(box ref x) = self; - x - } -} - -impl DerefMut for X { - fn deref_mut(&mut self) -> &mut isize { - let &mut X(box ref mut x) = self; - x - } -} - -fn main() { - { - let mut test = X(Box::new(5)); - { - let mut change = || { *test = 10 }; - change(); - } - assert_eq!(*test, 10); - } - assert!(unsafe { DESTRUCTOR_RAN }); -} diff --git a/tests/ui/feature-gates/feature-gate-cfg-target-has-atomic-equal-alignment.rs b/tests/ui/feature-gates/feature-gate-cfg-target-has-atomic-equal-alignment.rs deleted file mode 100644 index 3d692a0700197..0000000000000 --- a/tests/ui/feature-gates/feature-gate-cfg-target-has-atomic-equal-alignment.rs +++ /dev/null @@ -1,14 +0,0 @@ -fn main() { - cfg!(target_has_atomic_equal_alignment = "8"); - //~^ ERROR `cfg(target_has_atomic_equal_alignment)` is experimental and subject to change - cfg!(target_has_atomic_equal_alignment = "16"); - //~^ ERROR `cfg(target_has_atomic_equal_alignment)` is experimental and subject to change - cfg!(target_has_atomic_equal_alignment = "32"); - //~^ ERROR `cfg(target_has_atomic_equal_alignment)` is experimental and subject to change - cfg!(target_has_atomic_equal_alignment = "64"); - //~^ ERROR `cfg(target_has_atomic_equal_alignment)` is experimental and subject to change - cfg!(target_has_atomic_equal_alignment = "128"); - //~^ ERROR `cfg(target_has_atomic_equal_alignment)` is experimental and subject to change - cfg!(target_has_atomic_equal_alignment = "ptr"); - //~^ ERROR `cfg(target_has_atomic_equal_alignment)` is experimental and subject to change -} diff --git a/tests/ui/feature-gates/feature-gate-cfg-target-has-atomic-equal-alignment.stderr b/tests/ui/feature-gates/feature-gate-cfg-target-has-atomic-equal-alignment.stderr deleted file mode 100644 index 8d5d232ccc485..0000000000000 --- a/tests/ui/feature-gates/feature-gate-cfg-target-has-atomic-equal-alignment.stderr +++ /dev/null @@ -1,63 +0,0 @@ -error[E0658]: `cfg(target_has_atomic_equal_alignment)` is experimental and subject to change - --> $DIR/feature-gate-cfg-target-has-atomic-equal-alignment.rs:2:10 - | -LL | cfg!(target_has_atomic_equal_alignment = "8"); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = note: see issue #93822 for more information - = help: add `#![feature(cfg_target_has_atomic_equal_alignment)]` to the crate attributes to enable - = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date - -error[E0658]: `cfg(target_has_atomic_equal_alignment)` is experimental and subject to change - --> $DIR/feature-gate-cfg-target-has-atomic-equal-alignment.rs:4:10 - | -LL | cfg!(target_has_atomic_equal_alignment = "16"); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = note: see issue #93822 for more information - = help: add `#![feature(cfg_target_has_atomic_equal_alignment)]` to the crate attributes to enable - = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date - -error[E0658]: `cfg(target_has_atomic_equal_alignment)` is experimental and subject to change - --> $DIR/feature-gate-cfg-target-has-atomic-equal-alignment.rs:6:10 - | -LL | cfg!(target_has_atomic_equal_alignment = "32"); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = note: see issue #93822 for more information - = help: add `#![feature(cfg_target_has_atomic_equal_alignment)]` to the crate attributes to enable - = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date - -error[E0658]: `cfg(target_has_atomic_equal_alignment)` is experimental and subject to change - --> $DIR/feature-gate-cfg-target-has-atomic-equal-alignment.rs:8:10 - | -LL | cfg!(target_has_atomic_equal_alignment = "64"); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = note: see issue #93822 for more information - = help: add `#![feature(cfg_target_has_atomic_equal_alignment)]` to the crate attributes to enable - = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date - -error[E0658]: `cfg(target_has_atomic_equal_alignment)` is experimental and subject to change - --> $DIR/feature-gate-cfg-target-has-atomic-equal-alignment.rs:10:10 - | -LL | cfg!(target_has_atomic_equal_alignment = "128"); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = note: see issue #93822 for more information - = help: add `#![feature(cfg_target_has_atomic_equal_alignment)]` to the crate attributes to enable - = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date - -error[E0658]: `cfg(target_has_atomic_equal_alignment)` is experimental and subject to change - --> $DIR/feature-gate-cfg-target-has-atomic-equal-alignment.rs:12:10 - | -LL | cfg!(target_has_atomic_equal_alignment = "ptr"); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = note: see issue #93822 for more information - = help: add `#![feature(cfg_target_has_atomic_equal_alignment)]` to the crate attributes to enable - = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date - -error: aborting due to 6 previous errors - -For more information about this error, try `rustc --explain E0658`. diff --git a/tests/ui/lint/unused-features/used-language-features.rs b/tests/ui/lint/unused-features/used-language-features.rs index 7da4866a00d15..ddc40ce2e96b9 100644 --- a/tests/ui/lint/unused-features/used-language-features.rs +++ b/tests/ui/lint/unused-features/used-language-features.rs @@ -4,15 +4,9 @@ #![deny(unused_features)] // Used language features -#![feature(box_patterns)] #![feature(decl_macro)] #![cfg_attr(all(), feature(rustc_attrs))] -pub fn use_box_patterns(b: Box) -> i32 { - let box x = b; - x -} - macro m() {} pub fn use_decl_macro() { m!(); diff --git a/tests/ui/parser/break-in-unlabeled-block-parenthesized.rs b/tests/ui/parser/break-in-unlabeled-block-parenthesized.rs new file mode 100644 index 0000000000000..787aaf0c18450 --- /dev/null +++ b/tests/ui/parser/break-in-unlabeled-block-parenthesized.rs @@ -0,0 +1,9 @@ +#![allow(unused_parens)] +fn main() { + { + (break); //~ ERROR `break` outside of a loop or labeled block + }; + { + ((break)); //~ ERROR `break` outside of a loop or labeled block + }; +} diff --git a/tests/ui/parser/break-in-unlabeled-block-parenthesized.stderr b/tests/ui/parser/break-in-unlabeled-block-parenthesized.stderr new file mode 100644 index 0000000000000..17d3c977ffc90 --- /dev/null +++ b/tests/ui/parser/break-in-unlabeled-block-parenthesized.stderr @@ -0,0 +1,27 @@ +error[E0268]: `break` outside of a loop or labeled block + --> $DIR/break-in-unlabeled-block-parenthesized.rs:4:9 + | +LL | (break); + | ^^^^^^^ cannot `break` outside of a loop or labeled block + | +help: consider labeling this block to be able to break within it + | +LL ~ 'block: { +LL ~ (break 'block); + | + +error[E0268]: `break` outside of a loop or labeled block + --> $DIR/break-in-unlabeled-block-parenthesized.rs:7:9 + | +LL | ((break)); + | ^^^^^^^^^ cannot `break` outside of a loop or labeled block + | +help: consider labeling this block to be able to break within it + | +LL ~ 'block: { +LL ~ ((break 'block)); + | + +error: aborting due to 2 previous errors + +For more information about this error, try `rustc --explain E0268`. diff --git a/tests/ui/stdlib-unit-tests/atomic-from-mut-not-available.rs b/tests/ui/stdlib-unit-tests/atomic-from-mut-not-available.rs index b7c7f0ed98241..e8aad9cbeb9fa 100644 --- a/tests/ui/stdlib-unit-tests/atomic-from-mut-not-available.rs +++ b/tests/ui/stdlib-unit-tests/atomic-from-mut-not-available.rs @@ -1,11 +1,11 @@ -//! This test exercises the combined effect of the `cfg(target_has_atomic_equal_alignment = "...")` -//! implementation in the compiler plus usage of said `cfg(target_has_atomic_equal_alignment)` in -//! `core` for the `Atomic64::from_mut` API. +//! This test exercises the combined effect of the +//! `cfg(target_has_atomic_primitive_alignment = "...")` implementation in the compiler plus usage +//! of said `cfg(target_has_atomic_primitive_alignment)` in `core` for the `Atomic64::from_mut` API. //! //! This test is a basic smoke test: that `AtomicU64::from_mut` is gated by -//! `#[cfg(target_has_atomic_equal_alignment = "8")]`, which is only available on platforms where -//! `AtomicU64` has the same alignment as `u64`. This is notably *not* satisfied by `x86_32`, where -//! they have differing alignments. Thus, `AtomicU64::from_mut` should *not* be available on +//! `#[cfg(target_has_atomic_primitive_alignment = "64")]`, which is only available on platforms +//! where `AtomicU64` has the same alignment as `u64`. This is notably *not* satisfied by `x86_32`, +//! where they have differing alignments. Thus, `AtomicU64::from_mut` should *not* be available on //! `x86_32` linux and should report assoc item not found, if the `cfg` is working correctly. //! Conversely, `AtomicU64::from_mut` *should* be available on `x86_64` linux where the alignment //! matches.