Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 18 additions & 17 deletions crates/core_arch/src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -266,23 +266,22 @@ macro_rules! deinterleaving_load {
transmute((v0, v1))
}};

// N = 3
($elem:ty, $lanes:literal, 3, $ptr:expr) => {{
use $crate::core_arch::macros::deinterleave_mask;
use $crate::core_arch::simd::Simd;
use $crate::mem::{MaybeUninit, transmute};
use $crate::mem::transmute;

type V = Simd<$elem, $lanes>;
type W = Simd<$elem, { $lanes * 3 }>;
type Arr = [$elem; { $lanes * 3 }];

// NOTE: copy_nonoverlapping requires both pointers to be aligned to at least align_of::<$elem>(),
// passing a pointer that is not sufficiently aligned is an UB.
Comment thread
awxkee marked this conversation as resolved.
let arr: Arr = $crate::ptr::read_unaligned($ptr as *const [$elem; { $lanes * 3 }]);
// NOTE: repr(simd) adds padding to make the total size a power of two.
// Hence reading W from ptr might read out of bounds.
let mut mem = MaybeUninit::<W>::uninit();
$crate::ptr::copy_nonoverlapping(
$ptr.cast::<$elem>(),
mem.as_mut_ptr().cast::<$elem>(),
$lanes * 3,
);
let w = mem.assume_init();
// Hence reading a W from ptr might read out of bounds.
type W = Simd<$elem, { $lanes * 3 }>;
let w: W = W::from_array(arr);

let v0: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 3, 0>());
let v1: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 3, 1>());
Expand All @@ -291,6 +290,7 @@ macro_rules! deinterleaving_load {
transmute((v0, v1, v2))
}};

// N = 4
($elem:ty, $lanes:literal, 4, $ptr:expr) => {{
use $crate::core_arch::macros::deinterleave_mask;
use $crate::core_arch::simd::Simd;
Expand Down Expand Up @@ -345,16 +345,17 @@ macro_rules! interleaving_store {
let v2v2: Simd<$elem, { $lanes * 2 }> =
simd_shuffle!($v.2, $v.2, identity::<{ $lanes * 2 }>());

type W = Simd<$elem, { $lanes * 3 }>;

// NOTE: repr(simd) adds padding to make the total size a power of two.
// Hence writing W to ptr might write out of bounds.
type W = Simd<$elem, { $lanes * 3 }>;
Comment thread
awxkee marked this conversation as resolved.

let w: W = simd_shuffle!(v0v1, v2v2, interleave_mask::<{ $lanes * 3 }, $lanes, 3>());
$crate::ptr::copy_nonoverlapping(
(&w as *const W).cast::<$elem>(),
$ptr.cast::<$elem>(),
$lanes * 3,
);

let arr: [$elem; { $lanes * 3 }] = $crate::mem::transmute_copy(&w);

// NOTE: copy_nonoverlapping requires both pointers to be aligned to at least align_of::<$elem>(),
// passing a pointer that is not sufficiently aligned is an UB.
Comment thread
awxkee marked this conversation as resolved.
$ptr.cast::<[$elem; { $lanes * 3 }]>().write_unaligned(arr);
}};

// N = 4
Expand Down
Loading