diff --git a/crates/core_arch/src/macros.rs b/crates/core_arch/src/macros.rs index def2fd682b..6a2b62cf30 100644 --- a/crates/core_arch/src/macros.rs +++ b/crates/core_arch/src/macros.rs @@ -266,23 +266,22 @@ macro_rules! deinterleaving_load { transmute((v0, v1)) }}; + // N = 3 ($elem:ty, $lanes:literal, 3, $ptr:expr) => {{ use $crate::core_arch::macros::deinterleave_mask; use $crate::core_arch::simd::Simd; - use $crate::mem::{MaybeUninit, transmute}; + use $crate::mem::transmute; type V = Simd<$elem, $lanes>; - type W = Simd<$elem, { $lanes * 3 }>; + type Arr = [$elem; { $lanes * 3 }]; + // NOTE: copy_nonoverlapping requires both pointers to be aligned to at least align_of::<$elem>(), + // passing a pointer that is not sufficiently aligned is an UB. + let arr: Arr = $crate::ptr::read_unaligned($ptr as *const [$elem; { $lanes * 3 }]); // NOTE: repr(simd) adds padding to make the total size a power of two. - // Hence reading W from ptr might read out of bounds. - let mut mem = MaybeUninit::::uninit(); - $crate::ptr::copy_nonoverlapping( - $ptr.cast::<$elem>(), - mem.as_mut_ptr().cast::<$elem>(), - $lanes * 3, - ); - let w = mem.assume_init(); + // Hence reading a W from ptr might read out of bounds. + type W = Simd<$elem, { $lanes * 3 }>; + let w: W = W::from_array(arr); let v0: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 3, 0>()); let v1: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 3, 1>()); @@ -291,6 +290,7 @@ macro_rules! deinterleaving_load { transmute((v0, v1, v2)) }}; + // N = 4 ($elem:ty, $lanes:literal, 4, $ptr:expr) => {{ use $crate::core_arch::macros::deinterleave_mask; use $crate::core_arch::simd::Simd; @@ -345,16 +345,17 @@ macro_rules! interleaving_store { let v2v2: Simd<$elem, { $lanes * 2 }> = simd_shuffle!($v.2, $v.2, identity::<{ $lanes * 2 }>()); - type W = Simd<$elem, { $lanes * 3 }>; - // NOTE: repr(simd) adds padding to make the total size a power of two. // Hence writing W to ptr might write out of bounds. + type W = Simd<$elem, { $lanes * 3 }>; + let w: W = simd_shuffle!(v0v1, v2v2, interleave_mask::<{ $lanes * 3 }, $lanes, 3>()); - $crate::ptr::copy_nonoverlapping( - (&w as *const W).cast::<$elem>(), - $ptr.cast::<$elem>(), - $lanes * 3, - ); + + let arr: [$elem; { $lanes * 3 }] = $crate::mem::transmute_copy(&w); + + // NOTE: copy_nonoverlapping requires both pointers to be aligned to at least align_of::<$elem>(), + // passing a pointer that is not sufficiently aligned is an UB. + $ptr.cast::<[$elem; { $lanes * 3 }]>().write_unaligned(arr); }}; // N = 4