From 281d640bfa9740feeb5ab00207e5a75f3795bcbd Mon Sep 17 00:00:00 2001 From: Radzivon Bartoshyk Date: Sat, 30 May 2026 10:41:38 +0100 Subject: [PATCH 1/4] Removes alignment requirement in st3 --- crates/core_arch/src/macros.rs | 45 ++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/crates/core_arch/src/macros.rs b/crates/core_arch/src/macros.rs index def2fd682b..3b931e661d 100644 --- a/crates/core_arch/src/macros.rs +++ b/crates/core_arch/src/macros.rs @@ -266,23 +266,25 @@ macro_rules! deinterleaving_load { transmute((v0, v1)) }}; + // N = 3 ($elem:ty, $lanes:literal, 3, $ptr:expr) => {{ use $crate::core_arch::macros::deinterleave_mask; use $crate::core_arch::simd::Simd; - use $crate::mem::{MaybeUninit, transmute}; + use $crate::mem::transmute; + // NOTE: repr(simd) adds padding to make the total size a power of two. + // Hence writing W to ptr might write out of bounds. type V = Simd<$elem, $lanes>; - type W = Simd<$elem, { $lanes * 3 }>; + type Arr = [$elem; { $lanes * 3 }]; - // NOTE: repr(simd) adds padding to make the total size a power of two. - // Hence reading W from ptr might read out of bounds. - let mut mem = MaybeUninit::::uninit(); - $crate::ptr::copy_nonoverlapping( - $ptr.cast::<$elem>(), - mem.as_mut_ptr().cast::<$elem>(), - $lanes * 3, - ); - let w = mem.assume_init(); + // NOTE: copy_nonoverlapping requires both pointers to be aligned to at least align_of::<$elem>(), + // passing a pointer that is not sufficiently aligned is an UB. + let arr: Arr = $crate::ptr::read_unaligned($ptr as *const Arr); + + type W = Simd<$elem, { $lanes * 4 }>; + let mut tmp = [0 as $elem; { $lanes * 4 }]; + tmp[..$lanes * 3].copy_from_slice(&arr); + let w: W = W::from_array(tmp); let v0: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 3, 0>()); let v1: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 3, 1>()); @@ -291,6 +293,7 @@ macro_rules! deinterleaving_load { transmute((v0, v1, v2)) }}; + // N = 4 ($elem:ty, $lanes:literal, 4, $ptr:expr) => {{ use $crate::core_arch::macros::deinterleave_mask; use $crate::core_arch::simd::Simd; @@ -345,16 +348,22 @@ macro_rules! interleaving_store { let v2v2: Simd<$elem, { $lanes * 2 }> = simd_shuffle!($v.2, $v.2, identity::<{ $lanes * 2 }>()); - type W = Simd<$elem, { $lanes * 3 }>; - // NOTE: repr(simd) adds padding to make the total size a power of two. // Hence writing W to ptr might write out of bounds. + + type W = Simd<$elem, { $lanes * 3 }>; + let w: W = simd_shuffle!(v0v1, v2v2, interleave_mask::<{ $lanes * 3 }, $lanes, 3>()); - $crate::ptr::copy_nonoverlapping( - (&w as *const W).cast::<$elem>(), - $ptr.cast::<$elem>(), - $lanes * 3, - ); + + let arr: [$elem; { $lanes * 3 }] = $crate::mem::transmute_copy(&w); + + // NOTE: copy_nonoverlapping requires both pointers to be aligned to at least align_of::<$elem>(), + // passing a pointer that is not sufficiently aligned is an UB. + let mut i = 0; + while i < $lanes * 3 { + $crate::ptr::write_unaligned(($ptr as *mut $elem).add(i), arr[i]); + i += 1; + } }}; // N = 4 From b9a40e59b2dfbe95368c50b041a955ecf8cf7b6e Mon Sep 17 00:00:00 2001 From: Radzivon Bartoshyk Date: Sun, 31 May 2026 14:18:29 +0100 Subject: [PATCH 2/4] Update crates/core_arch/src/macros.rs Co-authored-by: Folkert de Vries --- crates/core_arch/src/macros.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/core_arch/src/macros.rs b/crates/core_arch/src/macros.rs index 3b931e661d..005d426135 100644 --- a/crates/core_arch/src/macros.rs +++ b/crates/core_arch/src/macros.rs @@ -350,7 +350,6 @@ macro_rules! interleaving_store { // NOTE: repr(simd) adds padding to make the total size a power of two. // Hence writing W to ptr might write out of bounds. - type W = Simd<$elem, { $lanes * 3 }>; let w: W = simd_shuffle!(v0v1, v2v2, interleave_mask::<{ $lanes * 3 }, $lanes, 3>()); From 3fdc05af7b867cbb55ac6b87a6ce4b6c67e7cb2d Mon Sep 17 00:00:00 2001 From: Radzivon Bartoshyk Date: Sun, 31 May 2026 14:23:10 +0100 Subject: [PATCH 3/4] Removes alignment requirement in st3 --- crates/core_arch/src/macros.rs | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/crates/core_arch/src/macros.rs b/crates/core_arch/src/macros.rs index 005d426135..6e36909095 100644 --- a/crates/core_arch/src/macros.rs +++ b/crates/core_arch/src/macros.rs @@ -272,19 +272,15 @@ macro_rules! deinterleaving_load { use $crate::core_arch::simd::Simd; use $crate::mem::transmute; - // NOTE: repr(simd) adds padding to make the total size a power of two. - // Hence writing W to ptr might write out of bounds. type V = Simd<$elem, $lanes>; type Arr = [$elem; { $lanes * 3 }]; // NOTE: copy_nonoverlapping requires both pointers to be aligned to at least align_of::<$elem>(), // passing a pointer that is not sufficiently aligned is an UB. - let arr: Arr = $crate::ptr::read_unaligned($ptr as *const Arr); - - type W = Simd<$elem, { $lanes * 4 }>; - let mut tmp = [0 as $elem; { $lanes * 4 }]; - tmp[..$lanes * 3].copy_from_slice(&arr); - let w: W = W::from_array(tmp); + let arr: Arr = std::array::from_fn(|x| $crate::ptr::read_unaligned(($ptr as *const $elem).add(x))); + // NOTE: repr(simd) adds padding to make the total size a power of two. + // Hence writing W to ptr might write out of bounds. + let w: W = W::from_array(arr); let v0: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 3, 0>()); let v1: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 3, 1>()); From fc7a454d5877d8958c7c93abeb28f61228555bfe Mon Sep 17 00:00:00 2001 From: Radzivon Bartoshyk Date: Sun, 31 May 2026 14:46:34 +0100 Subject: [PATCH 4/4] Removes alignment requirement in st3 --- crates/core_arch/src/macros.rs | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/crates/core_arch/src/macros.rs b/crates/core_arch/src/macros.rs index 6e36909095..6a2b62cf30 100644 --- a/crates/core_arch/src/macros.rs +++ b/crates/core_arch/src/macros.rs @@ -277,9 +277,10 @@ macro_rules! deinterleaving_load { // NOTE: copy_nonoverlapping requires both pointers to be aligned to at least align_of::<$elem>(), // passing a pointer that is not sufficiently aligned is an UB. - let arr: Arr = std::array::from_fn(|x| $crate::ptr::read_unaligned(($ptr as *const $elem).add(x))); + let arr: Arr = $crate::ptr::read_unaligned($ptr as *const [$elem; { $lanes * 3 }]); // NOTE: repr(simd) adds padding to make the total size a power of two. - // Hence writing W to ptr might write out of bounds. + // Hence reading a W from ptr might read out of bounds. + type W = Simd<$elem, { $lanes * 3 }>; let w: W = W::from_array(arr); let v0: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 3, 0>()); @@ -354,11 +355,7 @@ macro_rules! interleaving_store { // NOTE: copy_nonoverlapping requires both pointers to be aligned to at least align_of::<$elem>(), // passing a pointer that is not sufficiently aligned is an UB. - let mut i = 0; - while i < $lanes * 3 { - $crate::ptr::write_unaligned(($ptr as *mut $elem).add(i), arr[i]); - i += 1; - } + $ptr.cast::<[$elem; { $lanes * 3 }]>().write_unaligned(arr); }}; // N = 4