From 665351db62a5b30768d8a6929fed51eab8332f5c Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Thu, 28 May 2026 12:12:42 +0800 Subject: [PATCH 1/4] loongarch: Use `intrinsics::simd` for v{ld,st}[x] --- .../src/loongarch64/lasx/generated.rs | 40 --------- .../src/loongarch64/lasx/portable.rs | 8 ++ .../src/loongarch64/lsx/generated.rs | 40 --------- .../core_arch/src/loongarch64/lsx/portable.rs | 8 ++ crates/core_arch/src/loongarch64/simd.rs | 86 +++++++++++++++++++ crates/stdarch-gen-loongarch/lasx.spec | 4 + crates/stdarch-gen-loongarch/lsx.spec | 4 + .../src/portable-intrinsics.txt | 8 ++ 8 files changed, 118 insertions(+), 80 deletions(-) diff --git a/crates/core_arch/src/loongarch64/lasx/generated.rs b/crates/core_arch/src/loongarch64/lasx/generated.rs index 6a6a3ae924..6c0934b01d 100644 --- a/crates/core_arch/src/loongarch64/lasx/generated.rs +++ b/crates/core_arch/src/loongarch64/lasx/generated.rs @@ -455,10 +455,6 @@ unsafe extern "unadjusted" { fn __lasx_xvfrintrm_s(a: __v8f32) -> __v8f32; #[link_name = "llvm.loongarch.lasx.xvfrintrm.d"] fn __lasx_xvfrintrm_d(a: __v4f64) -> __v4f64; - #[link_name = "llvm.loongarch.lasx.xvld"] - fn __lasx_xvld(a: *const i8, b: i32) -> __v32i8; - #[link_name = "llvm.loongarch.lasx.xvst"] - fn __lasx_xvst(a: __v32i8, b: *mut i8, c: i32); #[link_name = "llvm.loongarch.lasx.xvstelm.b"] fn __lasx_xvstelm_b(a: __v32i8, b: *mut i8, c: i32, d: u32); #[link_name = "llvm.loongarch.lasx.xvstelm.h"] @@ -489,10 +485,6 @@ unsafe extern "unadjusted" { fn __lasx_xvssrln_w_d(a: __v4i64, b: __v4i64) -> __v8i32; #[link_name = "llvm.loongarch.lasx.xvldi"] fn __lasx_xvldi(a: i32) -> __v4i64; - #[link_name = "llvm.loongarch.lasx.xvldx"] - fn __lasx_xvldx(a: *const i8, b: i64) -> __v32i8; - #[link_name = "llvm.loongarch.lasx.xvstx"] - fn __lasx_xvstx(a: __v32i8, b: *mut i8, c: i64); #[link_name = "llvm.loongarch.lasx.xvextl.qu.du"] fn __lasx_xvextl_qu_du(a: __v4u64) -> __v4u64; #[link_name = "llvm.loongarch.lasx.vext2xv.h.b"] @@ -2665,24 +2657,6 @@ pub fn lasx_xvfrintrm_d(a: m256d) -> m256d { unsafe { transmute(__lasx_xvfrintrm_d(transmute(a))) } } -#[inline] -#[target_feature(enable = "lasx")] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub unsafe fn lasx_xvld(mem_addr: *const i8) -> m256i { - static_assert_simm_bits!(IMM_S12, 12); - transmute(__lasx_xvld(mem_addr, IMM_S12)) -} - -#[inline] -#[target_feature(enable = "lasx")] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub unsafe fn lasx_xvst(a: m256i, mem_addr: *mut i8) { - static_assert_simm_bits!(IMM_S12, 12); - __lasx_xvst(transmute(a), mem_addr, IMM_S12) -} - #[inline] #[target_feature(enable = "lasx")] #[rustc_legacy_const_generics(2, 3)] @@ -2810,20 +2784,6 @@ pub fn lasx_xvldi() -> m256i { unsafe { transmute(__lasx_xvldi(IMM_S13)) } } -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub unsafe fn lasx_xvldx(mem_addr: *const i8, b: i64) -> m256i { - transmute(__lasx_xvldx(mem_addr, transmute(b))) -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub unsafe fn lasx_xvstx(a: m256i, mem_addr: *mut i8, b: i64) { - __lasx_xvstx(transmute(a), mem_addr, transmute(b)) -} - #[inline] #[target_feature(enable = "lasx")] #[unstable(feature = "stdarch_loongarch", issue = "117427")] diff --git a/crates/core_arch/src/loongarch64/lasx/portable.rs b/crates/core_arch/src/loongarch64/lasx/portable.rs index 4dfe80795f..b6f4fdcb25 100644 --- a/crates/core_arch/src/loongarch64/lasx/portable.rs +++ b/crates/core_arch/src/loongarch64/lasx/portable.rs @@ -339,6 +339,10 @@ impl_gv!("lasx", lasx_xvreplgr2vr_h, ls::simd_splat, m256i, i16x16, i32); impl_gv!("lasx", lasx_xvreplgr2vr_w, ls::simd_splat, m256i, i32x8, i32); impl_gv!("lasx", lasx_xvreplgr2vr_d, ls::simd_splat, m256i, i64x4, i64); +impl_ggv!("lasx", lasx_xvldx, simd_ldx, m256i, i8x32, *const i8, i64, unsafe); + +impl_gsv!("lasx", lasx_xvld, simd_ld, m256i, i8x32, *const i8, 12, const, unsafe); + impl_sv!("lasx", lasx_xvrepli_b, ls::simd_splat, m256i, i8x32, 10); impl_sv!("lasx", lasx_xvrepli_h, ls::simd_splat, m256i, i16x16, 10); impl_sv!("lasx", lasx_xvrepli_w, ls::simd_splat, m256i, i32x8, 10); @@ -499,6 +503,10 @@ impl_vvv!("lasx", lasx_xvpackod_h, simd_packod_h, m256i, i16x16); impl_vvv!("lasx", lasx_xvpackod_w, simd_packod_w, m256i, i32x8); impl_vvv!("lasx", lasx_xvpackod_d, simd_packod_d, m256i, i64x4); +impl_vgg!("lasx", lasx_xvstx, simd_stx, m256i, i8x32, *mut i8, i64, unsafe); + +impl_vgs!("lasx", lasx_xvst, simd_st, m256i, i8x32, *mut i8, 12, const, unsafe); + impl_vuv!("lasx", lasx_xvslli_b, is::simd_shl, m256i, i8x32); impl_vuv!("lasx", lasx_xvslli_h, is::simd_shl, m256i, i16x16); impl_vuv!("lasx", lasx_xvslli_w, is::simd_shl, m256i, i32x8); diff --git a/crates/core_arch/src/loongarch64/lsx/generated.rs b/crates/core_arch/src/loongarch64/lsx/generated.rs index 555866040e..fc79ce3fe6 100644 --- a/crates/core_arch/src/loongarch64/lsx/generated.rs +++ b/crates/core_arch/src/loongarch64/lsx/generated.rs @@ -789,10 +789,6 @@ unsafe extern "unadjusted" { fn __lsx_vssrarni_du_q(a: __v2u64, b: __v2i64, c: u32) -> __v2u64; #[link_name = "llvm.loongarch.lsx.vpermi.w"] fn __lsx_vpermi_w(a: __v4i32, b: __v4i32, c: u32) -> __v4i32; - #[link_name = "llvm.loongarch.lsx.vld"] - fn __lsx_vld(a: *const i8, b: i32) -> __v16i8; - #[link_name = "llvm.loongarch.lsx.vst"] - fn __lsx_vst(a: __v16i8, b: *mut i8, c: i32); #[link_name = "llvm.loongarch.lsx.vssrlrn.b.h"] fn __lsx_vssrlrn_b_h(a: __v8i16, b: __v8i16) -> __v16i8; #[link_name = "llvm.loongarch.lsx.vssrlrn.h.w"] @@ -809,10 +805,6 @@ unsafe extern "unadjusted" { fn __lsx_vldi(a: i32) -> __v2i64; #[link_name = "llvm.loongarch.lsx.vshuf.b"] fn __lsx_vshuf_b(a: __v16i8, b: __v16i8, c: __v16i8) -> __v16i8; - #[link_name = "llvm.loongarch.lsx.vldx"] - fn __lsx_vldx(a: *const i8, b: i64) -> __v16i8; - #[link_name = "llvm.loongarch.lsx.vstx"] - fn __lsx_vstx(a: __v16i8, b: *mut i8, c: i64); #[link_name = "llvm.loongarch.lsx.vextl.qu.du"] fn __lsx_vextl_qu_du(a: __v2u64) -> __v2u64; #[link_name = "llvm.loongarch.lsx.bnz.b"] @@ -3876,24 +3868,6 @@ pub fn lsx_vpermi_w(a: m128i, b: m128i) -> m128i { unsafe { transmute(__lsx_vpermi_w(transmute(a), transmute(b), IMM8)) } } -#[inline] -#[target_feature(enable = "lsx")] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub unsafe fn lsx_vld(mem_addr: *const i8) -> m128i { - static_assert_simm_bits!(IMM_S12, 12); - transmute(__lsx_vld(mem_addr, IMM_S12)) -} - -#[inline] -#[target_feature(enable = "lsx")] -#[rustc_legacy_const_generics(2)] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub unsafe fn lsx_vst(a: m128i, mem_addr: *mut i8) { - static_assert_simm_bits!(IMM_S12, 12); - __lsx_vst(transmute(a), mem_addr, IMM_S12) -} - #[inline] #[target_feature(enable = "lsx")] #[unstable(feature = "stdarch_loongarch", issue = "117427")] @@ -3952,20 +3926,6 @@ pub fn lsx_vshuf_b(a: m128i, b: m128i, c: m128i) -> m128i { unsafe { transmute(__lsx_vshuf_b(transmute(a), transmute(b), transmute(c))) } } -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub unsafe fn lsx_vldx(mem_addr: *const i8, b: i64) -> m128i { - transmute(__lsx_vldx(mem_addr, transmute(b))) -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub unsafe fn lsx_vstx(a: m128i, mem_addr: *mut i8, b: i64) { - __lsx_vstx(transmute(a), mem_addr, transmute(b)) -} - #[inline] #[target_feature(enable = "lsx")] #[unstable(feature = "stdarch_loongarch", issue = "117427")] diff --git a/crates/core_arch/src/loongarch64/lsx/portable.rs b/crates/core_arch/src/loongarch64/lsx/portable.rs index 0b0df11bbf..b7a21bc3fe 100644 --- a/crates/core_arch/src/loongarch64/lsx/portable.rs +++ b/crates/core_arch/src/loongarch64/lsx/portable.rs @@ -233,6 +233,10 @@ impl_gv!("lsx", lsx_vreplgr2vr_h, ls::simd_splat, m128i, i16x8, i32); impl_gv!("lsx", lsx_vreplgr2vr_w, ls::simd_splat, m128i, i32x4, i32); impl_gv!("lsx", lsx_vreplgr2vr_d, ls::simd_splat, m128i, i64x2, i64); +impl_ggv!("lsx", lsx_vldx, simd_ldx, m128i, i8x16, *const i8, i64, unsafe); + +impl_gsv!("lsx", lsx_vld, simd_ld, m128i, i8x16, *const i8, 12, const, unsafe); + impl_sv!("lsx", lsx_vrepli_b, ls::simd_splat, m128i, i8x16, 10); impl_sv!("lsx", lsx_vrepli_h, ls::simd_splat, m128i, i16x8, 10); impl_sv!("lsx", lsx_vrepli_w, ls::simd_splat, m128i, i32x4, 10); @@ -393,6 +397,10 @@ impl_vvv!("lsx", lsx_vpackod_h, simd_packod_h, m128i, i16x8); impl_vvv!("lsx", lsx_vpackod_w, simd_packod_w, m128i, i32x4); impl_vvv!("lsx", lsx_vpackod_d, simd_packod_d, m128i, i64x2); +impl_vgg!("lsx", lsx_vstx, simd_stx, m128i, i8x16, *mut i8, i64, unsafe); + +impl_vgs!("lsx", lsx_vst, simd_st, m128i, i8x16, *mut i8, 12, const, unsafe); + impl_vuv!("lsx", lsx_vslli_b, is::simd_shl, m128i, i8x16); impl_vuv!("lsx", lsx_vslli_h, is::simd_shl, m128i, i16x8); impl_vuv!("lsx", lsx_vslli_w, is::simd_shl, m128i, i32x4); diff --git a/crates/core_arch/src/loongarch64/simd.rs b/crates/core_arch/src/loongarch64/simd.rs index 2c4a0f8493..7ec670c54a 100644 --- a/crates/core_arch/src/loongarch64/simd.rs +++ b/crates/core_arch/src/loongarch64/simd.rs @@ -108,6 +108,20 @@ pub(super) const unsafe fn simd_fnmsub(a: T, b: T, c: T) -> T { is::simd_neg(ls::simd_fmsub(a, b, c)) } +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(super) const unsafe fn simd_ld(a: *const i8) -> T { + let a = a.offset(I as isize) as *const T; + core::ptr::read_unaligned(a) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(super) const unsafe fn simd_ldx(a: *const i8, b: i64) -> T { + let a = a.offset(b as isize) as *const T; + core::ptr::read_unaligned(a) +} + #[inline(always)] #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] pub(super) const unsafe fn simd_madd(a: T, b: T, c: T) -> T { @@ -158,6 +172,20 @@ pub(super) const unsafe fn simd_splat(a: i64) -> T { T::splat(a) } +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(super) const unsafe fn simd_st(a: T, b: *mut i8) { + let b = b.offset(I as isize) as *mut T; + core::ptr::write_unaligned(b, a); +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(super) const unsafe fn simd_stx(a: T, b: *mut i8, c: i64) { + let b = b.offset(c as isize) as *mut T; + core::ptr::write_unaligned(b, a); +} + macro_rules! impl_vv { ($ft:literal, $name:ident, $op:path, $oty:ty, $ity:ty) => { #[inline] @@ -191,6 +219,36 @@ macro_rules! impl_gv { pub(super) use impl_gv; +macro_rules! impl_ggv { + ($ft:literal, $name:ident, $op:path, $oty:ty, $ity:ident, $gty:ty, $xty:ty, unsafe) => { + #[inline] + #[target_feature(enable = $ft)] + #[unstable(feature = "stdarch_loongarch", issue = "117427")] + pub unsafe fn $name(a: $gty, b: $xty) -> $oty { + let r: $ity = $op(a, b); + transmute(r) + } + }; +} + +pub(super) use impl_ggv; + +macro_rules! impl_gsv { + ($ft:literal, $name:ident, $op:ident, $oty:ty, $ity:ident, $gty:ty, $ibs:expr, const, unsafe) => { + #[inline] + #[target_feature(enable = $ft)] + #[rustc_legacy_const_generics(1)] + #[unstable(feature = "stdarch_loongarch", issue = "117427")] + pub unsafe fn $name(a: $gty) -> $oty { + static_assert_simm_bits!(IMM, $ibs); + let r: $ity = $op::(a); + transmute(r) + } + }; +} + +pub(super) use impl_gsv; + macro_rules! impl_sv { ($ft:literal, $name:ident, $op:path, $oty:ty, $ity:ident, $ibs:expr) => { #[inline] @@ -227,6 +285,34 @@ macro_rules! impl_vvv { pub(super) use impl_vvv; +macro_rules! impl_vgg { + ($ft:literal, $name:ident, $op:path, $oty:ty, $ity:ident, $gty:ty, $xty:ty, unsafe) => { + #[inline] + #[target_feature(enable = $ft)] + #[unstable(feature = "stdarch_loongarch", issue = "117427")] + pub unsafe fn $name(a: $oty, b: $gty, c: $xty) { + $op(a, b, c); + } + }; +} + +pub(super) use impl_vgg; + +macro_rules! impl_vgs { + ($ft:literal, $name:ident, $op:ident, $oty:ty, $ity:ident, $gty:ty, $ibs:expr, const, unsafe) => { + #[inline] + #[target_feature(enable = $ft)] + #[rustc_legacy_const_generics(2)] + #[unstable(feature = "stdarch_loongarch", issue = "117427")] + pub unsafe fn $name(a: $oty, b: $gty) { + static_assert_simm_bits!(IMM, $ibs); + $op::(a, b); + } + }; +} + +pub(super) use impl_vgs; + macro_rules! impl_vuv { ($ft:literal, $name:ident, $op:path, $oty:ty, $ity:ident) => { #[inline] diff --git a/crates/stdarch-gen-loongarch/lasx.spec b/crates/stdarch-gen-loongarch/lasx.spec index 41432adf25..1a9710fda8 100644 --- a/crates/stdarch-gen-loongarch/lasx.spec +++ b/crates/stdarch-gen-loongarch/lasx.spec @@ -2590,11 +2590,13 @@ asm-fmts = xd, xj data-types = V4DF, V4DF /// lasx_xvld +impl = portable name = lasx_xvld asm-fmts = xd, rj, si12 data-types = V32QI, CVPOINTER, SI /// lasx_xvst +impl = portable name = lasx_xvst asm-fmts = xd, rj, si12 data-types = VOID, V32QI, CVPOINTER, SI @@ -2681,11 +2683,13 @@ asm-fmts = xd, i13 data-types = V4DI, HI /// lasx_xvldx +impl = portable name = lasx_xvldx asm-fmts = xd, rj, rk data-types = V32QI, CVPOINTER, DI /// lasx_xvstx +impl = portable name = lasx_xvstx asm-fmts = xd, rj, rk data-types = VOID, V32QI, CVPOINTER, DI diff --git a/crates/stdarch-gen-loongarch/lsx.spec b/crates/stdarch-gen-loongarch/lsx.spec index 211c3c0fcf..158db20263 100644 --- a/crates/stdarch-gen-loongarch/lsx.spec +++ b/crates/stdarch-gen-loongarch/lsx.spec @@ -3503,11 +3503,13 @@ asm-fmts = vd, vj, ui8 data-types = V4SI, V4SI, V4SI, USI /// lsx_vld +impl = portable name = lsx_vld asm-fmts = vd, rj, si12 data-types = V16QI, CVPOINTER, SI /// lsx_vst +impl = portable name = lsx_vst asm-fmts = vd, rj, si12 data-types = VOID, V16QI, CVPOINTER, SI @@ -3559,11 +3561,13 @@ asm-fmts = vd, vj, vk, va data-types = V16QI, V16QI, V16QI, V16QI /// lsx_vldx +impl = portable name = lsx_vldx asm-fmts = vd, rj, rk data-types = V16QI, CVPOINTER, DI /// lsx_vstx +impl = portable name = lsx_vstx asm-fmts = vd, rj, rk data-types = VOID, V16QI, CVPOINTER, DI diff --git a/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt b/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt index abbfcb3365..495ed916f5 100644 --- a/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt +++ b/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt @@ -263,6 +263,10 @@ lsx_vshuf4i_b lsx_vshuf4i_h lsx_vshuf4i_w lsx_vshuf4i_d +lsx_vld +lsx_vst +lsx_vldx +lsx_vstx # LASX intrinsics lasx_xvsll_b @@ -527,3 +531,7 @@ lasx_xvpackod_d lasx_xvshuf4i_b lasx_xvshuf4i_h lasx_xvshuf4i_w +lasx_xvld +lasx_xvst +lasx_xvldx +lasx_xvstx From a793393871529d6766c7079bc18daa4c478324d5 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Tue, 2 Jun 2026 11:38:51 +0800 Subject: [PATCH 2/4] loongarch: Use `intrinsics::simd` for vrotr[i] --- .../src/loongarch64/lasx/generated.rs | 80 ------------------- .../src/loongarch64/lasx/portable.rs | 8 ++ .../src/loongarch64/lsx/generated.rs | 80 ------------------- .../core_arch/src/loongarch64/lsx/portable.rs | 8 ++ crates/core_arch/src/loongarch64/simd.rs | 9 +++ crates/stdarch-gen-loongarch/lasx.spec | 8 ++ crates/stdarch-gen-loongarch/lsx.spec | 8 ++ .../src/portable-intrinsics.txt | 16 ++++ 8 files changed, 57 insertions(+), 160 deletions(-) diff --git a/crates/core_arch/src/loongarch64/lasx/generated.rs b/crates/core_arch/src/loongarch64/lasx/generated.rs index 6c0934b01d..f71f462520 100644 --- a/crates/core_arch/src/loongarch64/lasx/generated.rs +++ b/crates/core_arch/src/loongarch64/lasx/generated.rs @@ -701,14 +701,6 @@ unsafe extern "unadjusted" { fn __lasx_xvmaddwod_w_hu_h(a: __v8i32, b: __v16u16, c: __v16i16) -> __v8i32; #[link_name = "llvm.loongarch.lasx.xvmaddwod.h.bu.b"] fn __lasx_xvmaddwod_h_bu_b(a: __v16i16, b: __v32u8, c: __v32i8) -> __v16i16; - #[link_name = "llvm.loongarch.lasx.xvrotr.b"] - fn __lasx_xvrotr_b(a: __v32i8, b: __v32i8) -> __v32i8; - #[link_name = "llvm.loongarch.lasx.xvrotr.h"] - fn __lasx_xvrotr_h(a: __v16i16, b: __v16i16) -> __v16i16; - #[link_name = "llvm.loongarch.lasx.xvrotr.w"] - fn __lasx_xvrotr_w(a: __v8i32, b: __v8i32) -> __v8i32; - #[link_name = "llvm.loongarch.lasx.xvrotr.d"] - fn __lasx_xvrotr_d(a: __v4i64, b: __v4i64) -> __v4i64; #[link_name = "llvm.loongarch.lasx.xvadd.q"] fn __lasx_xvadd_q(a: __v4i64, b: __v4i64) -> __v4i64; #[link_name = "llvm.loongarch.lasx.xvsub.q"] @@ -741,14 +733,6 @@ unsafe extern "unadjusted" { fn __lasx_xvexth_du_wu(a: __v8u32) -> __v4u64; #[link_name = "llvm.loongarch.lasx.xvexth.qu.du"] fn __lasx_xvexth_qu_du(a: __v4u64) -> __v4u64; - #[link_name = "llvm.loongarch.lasx.xvrotri.b"] - fn __lasx_xvrotri_b(a: __v32i8, b: u32) -> __v32i8; - #[link_name = "llvm.loongarch.lasx.xvrotri.h"] - fn __lasx_xvrotri_h(a: __v16i16, b: u32) -> __v16i16; - #[link_name = "llvm.loongarch.lasx.xvrotri.w"] - fn __lasx_xvrotri_w(a: __v8i32, b: u32) -> __v8i32; - #[link_name = "llvm.loongarch.lasx.xvrotri.d"] - fn __lasx_xvrotri_d(a: __v4i64, b: u32) -> __v4i64; #[link_name = "llvm.loongarch.lasx.xvextl.q.d"] fn __lasx_xvextl_q_d(a: __v4i64) -> __v4i64; #[link_name = "llvm.loongarch.lasx.xvsrlni.b.h"] @@ -3552,34 +3536,6 @@ pub fn lasx_xvmaddwod_h_bu_b(a: m256i, b: m256i, c: m256i) -> m256i { unsafe { transmute(__lasx_xvmaddwod_h_bu_b(transmute(a), transmute(b), transmute(c))) } } -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvrotr_b(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvrotr_b(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvrotr_h(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvrotr_h(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvrotr_w(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvrotr_w(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvrotr_d(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvrotr_d(transmute(a), transmute(b))) } -} - #[inline] #[target_feature(enable = "lasx")] #[unstable(feature = "stdarch_loongarch", issue = "117427")] @@ -3692,42 +3648,6 @@ pub fn lasx_xvexth_qu_du(a: m256i) -> m256i { unsafe { transmute(__lasx_xvexth_qu_du(transmute(a))) } } -#[inline] -#[target_feature(enable = "lasx")] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvrotri_b(a: m256i) -> m256i { - static_assert_uimm_bits!(IMM3, 3); - unsafe { transmute(__lasx_xvrotri_b(transmute(a), IMM3)) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvrotri_h(a: m256i) -> m256i { - static_assert_uimm_bits!(IMM4, 4); - unsafe { transmute(__lasx_xvrotri_h(transmute(a), IMM4)) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvrotri_w(a: m256i) -> m256i { - static_assert_uimm_bits!(IMM5, 5); - unsafe { transmute(__lasx_xvrotri_w(transmute(a), IMM5)) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvrotri_d(a: m256i) -> m256i { - static_assert_uimm_bits!(IMM6, 6); - unsafe { transmute(__lasx_xvrotri_d(transmute(a), IMM6)) } -} - #[inline] #[target_feature(enable = "lasx")] #[unstable(feature = "stdarch_loongarch", issue = "117427")] diff --git a/crates/core_arch/src/loongarch64/lasx/portable.rs b/crates/core_arch/src/loongarch64/lasx/portable.rs index b6f4fdcb25..79211632d7 100644 --- a/crates/core_arch/src/loongarch64/lasx/portable.rs +++ b/crates/core_arch/src/loongarch64/lasx/portable.rs @@ -438,6 +438,10 @@ impl_vvv!("lasx", lasx_xvsrl_b, ls::simd_shr, m256i, u8x32); impl_vvv!("lasx", lasx_xvsrl_h, ls::simd_shr, m256i, u16x16); impl_vvv!("lasx", lasx_xvsrl_w, ls::simd_shr, m256i, u32x8); impl_vvv!("lasx", lasx_xvsrl_d, ls::simd_shr, m256i, u64x4); +impl_vvv!("lasx", lasx_xvrotr_b, ls::simd_rotr, m256i, u8x32); +impl_vvv!("lasx", lasx_xvrotr_h, ls::simd_rotr, m256i, u16x16); +impl_vvv!("lasx", lasx_xvrotr_w, ls::simd_rotr, m256i, u32x8); +impl_vvv!("lasx", lasx_xvrotr_d, ls::simd_rotr, m256i, u64x4); impl_vvv!("lasx", lasx_xvbitclr_b, ls::simd_bitclr, m256i, u8x32); impl_vvv!("lasx", lasx_xvbitclr_h, ls::simd_bitclr, m256i, u16x16); impl_vvv!("lasx", lasx_xvbitclr_w, ls::simd_bitclr, m256i, u32x8); @@ -519,6 +523,10 @@ impl_vuv!("lasx", lasx_xvsrli_b, is::simd_shr, m256i, u8x32); impl_vuv!("lasx", lasx_xvsrli_h, is::simd_shr, m256i, u16x16); impl_vuv!("lasx", lasx_xvsrli_w, is::simd_shr, m256i, u32x8); impl_vuv!("lasx", lasx_xvsrli_d, is::simd_shr, m256i, u64x4); +impl_vuv!("lasx", lasx_xvrotri_b, ls::simd_rotr, m256i, u8x32); +impl_vuv!("lasx", lasx_xvrotri_h, ls::simd_rotr, m256i, u16x16); +impl_vuv!("lasx", lasx_xvrotri_w, ls::simd_rotr, m256i, u32x8); +impl_vuv!("lasx", lasx_xvrotri_d, ls::simd_rotr, m256i, u64x4); impl_vuv!("lasx", lasx_xvaddi_bu, is::simd_add, m256i, u8x32, 5); impl_vuv!("lasx", lasx_xvaddi_hu, is::simd_add, m256i, u16x16, 5); impl_vuv!("lasx", lasx_xvaddi_wu, is::simd_add, m256i, u32x8, 5); diff --git a/crates/core_arch/src/loongarch64/lsx/generated.rs b/crates/core_arch/src/loongarch64/lsx/generated.rs index fc79ce3fe6..5012ed8338 100644 --- a/crates/core_arch/src/loongarch64/lsx/generated.rs +++ b/crates/core_arch/src/loongarch64/lsx/generated.rs @@ -641,14 +641,6 @@ unsafe extern "unadjusted" { fn __lsx_vmaddwev_q_du_d(a: __v2i64, b: __v2u64, c: __v2i64) -> __v2i64; #[link_name = "llvm.loongarch.lsx.vmaddwod.q.du.d"] fn __lsx_vmaddwod_q_du_d(a: __v2i64, b: __v2u64, c: __v2i64) -> __v2i64; - #[link_name = "llvm.loongarch.lsx.vrotr.b"] - fn __lsx_vrotr_b(a: __v16i8, b: __v16i8) -> __v16i8; - #[link_name = "llvm.loongarch.lsx.vrotr.h"] - fn __lsx_vrotr_h(a: __v8i16, b: __v8i16) -> __v8i16; - #[link_name = "llvm.loongarch.lsx.vrotr.w"] - fn __lsx_vrotr_w(a: __v4i32, b: __v4i32) -> __v4i32; - #[link_name = "llvm.loongarch.lsx.vrotr.d"] - fn __lsx_vrotr_d(a: __v2i64, b: __v2i64) -> __v2i64; #[link_name = "llvm.loongarch.lsx.vadd.q"] fn __lsx_vadd_q(a: __v2i64, b: __v2i64) -> __v2i64; #[link_name = "llvm.loongarch.lsx.vsub.q"] @@ -681,14 +673,6 @@ unsafe extern "unadjusted" { fn __lsx_vexth_du_wu(a: __v4u32) -> __v2u64; #[link_name = "llvm.loongarch.lsx.vexth.qu.du"] fn __lsx_vexth_qu_du(a: __v2u64) -> __v2u64; - #[link_name = "llvm.loongarch.lsx.vrotri.b"] - fn __lsx_vrotri_b(a: __v16i8, b: u32) -> __v16i8; - #[link_name = "llvm.loongarch.lsx.vrotri.h"] - fn __lsx_vrotri_h(a: __v8i16, b: u32) -> __v8i16; - #[link_name = "llvm.loongarch.lsx.vrotri.w"] - fn __lsx_vrotri_w(a: __v4i32, b: u32) -> __v4i32; - #[link_name = "llvm.loongarch.lsx.vrotri.d"] - fn __lsx_vrotri_d(a: __v2i64, b: u32) -> __v2i64; #[link_name = "llvm.loongarch.lsx.vextl.q.d"] fn __lsx_vextl_q_d(a: __v2i64) -> __v2i64; #[link_name = "llvm.loongarch.lsx.vsrlni.b.h"] @@ -3236,34 +3220,6 @@ pub fn lsx_vmaddwod_q_du_d(a: m128i, b: m128i, c: m128i) -> m128i { unsafe { transmute(__lsx_vmaddwod_q_du_d(transmute(a), transmute(b), transmute(c))) } } -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vrotr_b(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vrotr_b(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vrotr_h(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vrotr_h(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vrotr_w(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vrotr_w(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vrotr_d(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vrotr_d(transmute(a), transmute(b))) } -} - #[inline] #[target_feature(enable = "lsx")] #[unstable(feature = "stdarch_loongarch", issue = "117427")] @@ -3384,42 +3340,6 @@ pub fn lsx_vexth_qu_du(a: m128i) -> m128i { unsafe { transmute(__lsx_vexth_qu_du(transmute(a))) } } -#[inline] -#[target_feature(enable = "lsx")] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vrotri_b(a: m128i) -> m128i { - static_assert_uimm_bits!(IMM3, 3); - unsafe { transmute(__lsx_vrotri_b(transmute(a), IMM3)) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vrotri_h(a: m128i) -> m128i { - static_assert_uimm_bits!(IMM4, 4); - unsafe { transmute(__lsx_vrotri_h(transmute(a), IMM4)) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vrotri_w(a: m128i) -> m128i { - static_assert_uimm_bits!(IMM5, 5); - unsafe { transmute(__lsx_vrotri_w(transmute(a), IMM5)) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[rustc_legacy_const_generics(1)] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vrotri_d(a: m128i) -> m128i { - static_assert_uimm_bits!(IMM6, 6); - unsafe { transmute(__lsx_vrotri_d(transmute(a), IMM6)) } -} - #[inline] #[target_feature(enable = "lsx")] #[unstable(feature = "stdarch_loongarch", issue = "117427")] diff --git a/crates/core_arch/src/loongarch64/lsx/portable.rs b/crates/core_arch/src/loongarch64/lsx/portable.rs index b7a21bc3fe..3cf38b53e0 100644 --- a/crates/core_arch/src/loongarch64/lsx/portable.rs +++ b/crates/core_arch/src/loongarch64/lsx/portable.rs @@ -332,6 +332,10 @@ impl_vvv!("lsx", lsx_vsrl_b, ls::simd_shr, m128i, u8x16); impl_vvv!("lsx", lsx_vsrl_h, ls::simd_shr, m128i, u16x8); impl_vvv!("lsx", lsx_vsrl_w, ls::simd_shr, m128i, u32x4); impl_vvv!("lsx", lsx_vsrl_d, ls::simd_shr, m128i, u64x2); +impl_vvv!("lsx", lsx_vrotr_b, ls::simd_rotr, m128i, u8x16); +impl_vvv!("lsx", lsx_vrotr_h, ls::simd_rotr, m128i, u16x8); +impl_vvv!("lsx", lsx_vrotr_w, ls::simd_rotr, m128i, u32x4); +impl_vvv!("lsx", lsx_vrotr_d, ls::simd_rotr, m128i, u64x2); impl_vvv!("lsx", lsx_vbitclr_b, ls::simd_bitclr, m128i, u8x16); impl_vvv!("lsx", lsx_vbitclr_h, ls::simd_bitclr, m128i, u16x8); impl_vvv!("lsx", lsx_vbitclr_w, ls::simd_bitclr, m128i, u32x4); @@ -413,6 +417,10 @@ impl_vuv!("lsx", lsx_vsrli_b, is::simd_shr, m128i, u8x16); impl_vuv!("lsx", lsx_vsrli_h, is::simd_shr, m128i, u16x8); impl_vuv!("lsx", lsx_vsrli_w, is::simd_shr, m128i, u32x4); impl_vuv!("lsx", lsx_vsrli_d, is::simd_shr, m128i, u64x2); +impl_vuv!("lsx", lsx_vrotri_b, ls::simd_rotr, m128i, u8x16); +impl_vuv!("lsx", lsx_vrotri_h, ls::simd_rotr, m128i, u16x8); +impl_vuv!("lsx", lsx_vrotri_w, ls::simd_rotr, m128i, u32x4); +impl_vuv!("lsx", lsx_vrotri_d, ls::simd_rotr, m128i, u64x2); impl_vuv!("lsx", lsx_vaddi_bu, is::simd_add, m128i, u8x16, 5); impl_vuv!("lsx", lsx_vaddi_hu, is::simd_add, m128i, u16x8, 5); impl_vuv!("lsx", lsx_vaddi_wu, is::simd_add, m128i, u32x4, 5); diff --git a/crates/core_arch/src/loongarch64/simd.rs b/crates/core_arch/src/loongarch64/simd.rs index 7ec670c54a..ad443af61c 100644 --- a/crates/core_arch/src/loongarch64/simd.rs +++ b/crates/core_arch/src/loongarch64/simd.rs @@ -152,6 +152,15 @@ pub(super) const unsafe fn simd_orn(a: T, b: T) -> T { is::simd_or(a, ls::simd_not(b)) } +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(super) const unsafe fn simd_rotr(a: T, b: T) -> T { + let m = (size_of::() * 8 - 1) as i64; + let r = is::simd_and(b, ls::simd_splat(m)); + let l = is::simd_and(is::simd_sub(ls::simd_splat(m + 1), r), ls::simd_splat(m)); + is::simd_or(is::simd_shr(a, r), is::simd_shl(a, l)) +} + #[inline(always)] #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] pub(super) const unsafe fn simd_shl(a: T, b: T) -> T { diff --git a/crates/stdarch-gen-loongarch/lasx.spec b/crates/stdarch-gen-loongarch/lasx.spec index 1a9710fda8..e822330bdf 100644 --- a/crates/stdarch-gen-loongarch/lasx.spec +++ b/crates/stdarch-gen-loongarch/lasx.spec @@ -3301,21 +3301,25 @@ asm-fmts = xd, xj, xk data-types = V16HI, V16HI, UV32QI, V32QI /// lasx_xvrotr_b +impl = portable name = lasx_xvrotr_b asm-fmts = xd, xj, xk data-types = V32QI, V32QI, V32QI /// lasx_xvrotr_h +impl = portable name = lasx_xvrotr_h asm-fmts = xd, xj, xk data-types = V16HI, V16HI, V16HI /// lasx_xvrotr_w +impl = portable name = lasx_xvrotr_w asm-fmts = xd, xj, xk data-types = V8SI, V8SI, V8SI /// lasx_xvrotr_d +impl = portable name = lasx_xvrotr_d asm-fmts = xd, xj, xk data-types = V4DI, V4DI, V4DI @@ -3401,21 +3405,25 @@ asm-fmts = xd, xj data-types = UV4DI, UV4DI /// lasx_xvrotri_b +impl = portable name = lasx_xvrotri_b asm-fmts = xd, xj, ui3 data-types = V32QI, V32QI, UQI /// lasx_xvrotri_h +impl = portable name = lasx_xvrotri_h asm-fmts = xd, xj, ui4 data-types = V16HI, V16HI, UQI /// lasx_xvrotri_w +impl = portable name = lasx_xvrotri_w asm-fmts = xd, xj, ui5 data-types = V8SI, V8SI, UQI /// lasx_xvrotri_d +impl = portable name = lasx_xvrotri_d asm-fmts = xd, xj, ui6 data-types = V4DI, V4DI, UQI diff --git a/crates/stdarch-gen-loongarch/lsx.spec b/crates/stdarch-gen-loongarch/lsx.spec index 158db20263..e319daa1cb 100644 --- a/crates/stdarch-gen-loongarch/lsx.spec +++ b/crates/stdarch-gen-loongarch/lsx.spec @@ -3133,21 +3133,25 @@ asm-fmts = vd, vj, vk data-types = V2DI, V2DI, UV2DI, V2DI /// lsx_vrotr_b +impl = portable name = lsx_vrotr_b asm-fmts = vd, vj, vk data-types = V16QI, V16QI, V16QI /// lsx_vrotr_h +impl = portable name = lsx_vrotr_h asm-fmts = vd, vj, vk data-types = V8HI, V8HI, V8HI /// lsx_vrotr_w +impl = portable name = lsx_vrotr_w asm-fmts = vd, vj, vk data-types = V4SI, V4SI, V4SI /// lsx_vrotr_d +impl = portable name = lsx_vrotr_d asm-fmts = vd, vj, vk data-types = V2DI, V2DI, V2DI @@ -3233,21 +3237,25 @@ asm-fmts = vd, vj data-types = UV2DI, UV2DI /// lsx_vrotri_b +impl = portable name = lsx_vrotri_b asm-fmts = vd, vj, ui3 data-types = V16QI, V16QI, UQI /// lsx_vrotri_h +impl = portable name = lsx_vrotri_h asm-fmts = vd, vj, ui4 data-types = V8HI, V8HI, UQI /// lsx_vrotri_w +impl = portable name = lsx_vrotri_w asm-fmts = vd, vj, ui5 data-types = V4SI, V4SI, UQI /// lsx_vrotri_d +impl = portable name = lsx_vrotri_d asm-fmts = vd, vj, ui6 data-types = V2DI, V2DI, UQI diff --git a/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt b/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt index 495ed916f5..db86381aee 100644 --- a/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt +++ b/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt @@ -267,6 +267,14 @@ lsx_vld lsx_vst lsx_vldx lsx_vstx +lsx_vrotr_b +lsx_vrotr_h +lsx_vrotr_w +lsx_vrotr_d +lsx_vrotri_b +lsx_vrotri_h +lsx_vrotri_w +lsx_vrotri_d # LASX intrinsics lasx_xvsll_b @@ -535,3 +543,11 @@ lasx_xvld lasx_xvst lasx_xvldx lasx_xvstx +lasx_xvrotr_b +lasx_xvrotr_h +lasx_xvrotr_w +lasx_xvrotr_d +lasx_xvrotri_b +lasx_xvrotri_h +lasx_xvrotri_w +lasx_xvrotri_d From 4cde79bd4be13ab27fa0cc9e875513487d6e1d84 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Tue, 2 Jun 2026 16:58:36 +0800 Subject: [PATCH 3/4] loongarch: Use `intrinsics::simd` for vfrecip --- .../src/loongarch64/lasx/generated.rs | 18 ------------------ .../core_arch/src/loongarch64/lasx/portable.rs | 2 ++ .../core_arch/src/loongarch64/lsx/generated.rs | 18 ------------------ .../core_arch/src/loongarch64/lsx/portable.rs | 2 ++ crates/core_arch/src/loongarch64/simd.rs | 12 ++++++++++++ crates/stdarch-gen-loongarch/lasx.spec | 2 ++ crates/stdarch-gen-loongarch/lsx.spec | 2 ++ .../src/portable-intrinsics.txt | 4 ++++ 8 files changed, 24 insertions(+), 36 deletions(-) diff --git a/crates/core_arch/src/loongarch64/lasx/generated.rs b/crates/core_arch/src/loongarch64/lasx/generated.rs index f71f462520..5a4d71207d 100644 --- a/crates/core_arch/src/loongarch64/lasx/generated.rs +++ b/crates/core_arch/src/loongarch64/lasx/generated.rs @@ -199,10 +199,6 @@ unsafe extern "unadjusted" { fn __lasx_xvfclass_s(a: __v8f32) -> __v8i32; #[link_name = "llvm.loongarch.lasx.xvfclass.d"] fn __lasx_xvfclass_d(a: __v4f64) -> __v4i64; - #[link_name = "llvm.loongarch.lasx.xvfrecip.s"] - fn __lasx_xvfrecip_s(a: __v8f32) -> __v8f32; - #[link_name = "llvm.loongarch.lasx.xvfrecip.d"] - fn __lasx_xvfrecip_d(a: __v4f64) -> __v4f64; #[link_name = "llvm.loongarch.lasx.xvfrecipe.s"] fn __lasx_xvfrecipe_s(a: __v8f32) -> __v8f32; #[link_name = "llvm.loongarch.lasx.xvfrecipe.d"] @@ -1713,20 +1709,6 @@ pub fn lasx_xvfclass_d(a: m256d) -> m256i { unsafe { transmute(__lasx_xvfclass_d(transmute(a))) } } -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvfrecip_s(a: m256) -> m256 { - unsafe { transmute(__lasx_xvfrecip_s(transmute(a))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvfrecip_d(a: m256d) -> m256d { - unsafe { transmute(__lasx_xvfrecip_d(transmute(a))) } -} - #[inline] #[target_feature(enable = "lasx,frecipe")] #[unstable(feature = "stdarch_loongarch", issue = "117427")] diff --git a/crates/core_arch/src/loongarch64/lasx/portable.rs b/crates/core_arch/src/loongarch64/lasx/portable.rs index 79211632d7..560c196a2a 100644 --- a/crates/core_arch/src/loongarch64/lasx/portable.rs +++ b/crates/core_arch/src/loongarch64/lasx/portable.rs @@ -328,6 +328,8 @@ impl_vv!("lasx", lasx_xvneg_w, is::simd_neg, m256i, i32x8); impl_vv!("lasx", lasx_xvneg_d, is::simd_neg, m256i, i64x4); impl_vv!("lasx", lasx_xvfsqrt_s, is::simd_fsqrt, m256, f32x8); impl_vv!("lasx", lasx_xvfsqrt_d, is::simd_fsqrt, m256d, f64x4); +impl_vv!("lasx", lasx_xvfrecip_s, ls::simd_frecip_s, m256, f32x8); +impl_vv!("lasx", lasx_xvfrecip_d, ls::simd_frecip_d, m256d, f64x4); impl_vv!("lasx", lasx_xvreplve0_b, simd_replve0_b, m256i, i8x32); impl_vv!("lasx", lasx_xvreplve0_h, simd_replve0_h, m256i, i16x16); impl_vv!("lasx", lasx_xvreplve0_w, simd_replve0_w, m256i, i32x8); diff --git a/crates/core_arch/src/loongarch64/lsx/generated.rs b/crates/core_arch/src/loongarch64/lsx/generated.rs index 5012ed8338..8fa39dbdfd 100644 --- a/crates/core_arch/src/loongarch64/lsx/generated.rs +++ b/crates/core_arch/src/loongarch64/lsx/generated.rs @@ -205,10 +205,6 @@ unsafe extern "unadjusted" { fn __lsx_vfclass_s(a: __v4f32) -> __v4i32; #[link_name = "llvm.loongarch.lsx.vfclass.d"] fn __lsx_vfclass_d(a: __v2f64) -> __v2i64; - #[link_name = "llvm.loongarch.lsx.vfrecip.s"] - fn __lsx_vfrecip_s(a: __v4f32) -> __v4f32; - #[link_name = "llvm.loongarch.lsx.vfrecip.d"] - fn __lsx_vfrecip_d(a: __v2f64) -> __v2f64; #[link_name = "llvm.loongarch.lsx.vfrecipe.s"] fn __lsx_vfrecipe_s(a: __v4f32) -> __v4f32; #[link_name = "llvm.loongarch.lsx.vfrecipe.d"] @@ -1654,20 +1650,6 @@ pub fn lsx_vfclass_d(a: m128d) -> m128i { unsafe { transmute(__lsx_vfclass_d(transmute(a))) } } -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vfrecip_s(a: m128) -> m128 { - unsafe { transmute(__lsx_vfrecip_s(transmute(a))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vfrecip_d(a: m128d) -> m128d { - unsafe { transmute(__lsx_vfrecip_d(transmute(a))) } -} - #[inline] #[target_feature(enable = "lsx,frecipe")] #[unstable(feature = "stdarch_loongarch", issue = "117427")] diff --git a/crates/core_arch/src/loongarch64/lsx/portable.rs b/crates/core_arch/src/loongarch64/lsx/portable.rs index 3cf38b53e0..3e88b4edd1 100644 --- a/crates/core_arch/src/loongarch64/lsx/portable.rs +++ b/crates/core_arch/src/loongarch64/lsx/portable.rs @@ -227,6 +227,8 @@ impl_vv!("lsx", lsx_vneg_w, is::simd_neg, m128i, i32x4); impl_vv!("lsx", lsx_vneg_d, is::simd_neg, m128i, i64x2); impl_vv!("lsx", lsx_vfsqrt_s, is::simd_fsqrt, m128, f32x4); impl_vv!("lsx", lsx_vfsqrt_d, is::simd_fsqrt, m128d, f64x2); +impl_vv!("lsx", lsx_vfrecip_s, ls::simd_frecip_s, m128, f32x4); +impl_vv!("lsx", lsx_vfrecip_d, ls::simd_frecip_d, m128d, f64x2); impl_gv!("lsx", lsx_vreplgr2vr_b, ls::simd_splat, m128i, i8x16, i32); impl_gv!("lsx", lsx_vreplgr2vr_h, ls::simd_splat, m128i, i16x8, i32); diff --git a/crates/core_arch/src/loongarch64/simd.rs b/crates/core_arch/src/loongarch64/simd.rs index ad443af61c..23e1620f14 100644 --- a/crates/core_arch/src/loongarch64/simd.rs +++ b/crates/core_arch/src/loongarch64/simd.rs @@ -108,6 +108,18 @@ pub(super) const unsafe fn simd_fnmsub(a: T, b: T, c: T) -> T { is::simd_neg(ls::simd_fmsub(a, b, c)) } +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(super) const unsafe fn simd_frecip_s(a: T) -> T { + is::simd_div(is::simd_splat(1.0f32), a) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(super) const unsafe fn simd_frecip_d(a: T) -> T { + is::simd_div(is::simd_splat(1.0f64), a) +} + #[inline(always)] #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] pub(super) const unsafe fn simd_ld(a: *const i8) -> T { diff --git a/crates/stdarch-gen-loongarch/lasx.spec b/crates/stdarch-gen-loongarch/lasx.spec index e822330bdf..78fcc6278a 100644 --- a/crates/stdarch-gen-loongarch/lasx.spec +++ b/crates/stdarch-gen-loongarch/lasx.spec @@ -1872,11 +1872,13 @@ asm-fmts = xd, xj data-types = V4DF, V4DF /// lasx_xvfrecip_s +impl = portable name = lasx_xvfrecip_s asm-fmts = xd, xj data-types = V8SF, V8SF /// lasx_xvfrecip_d +impl = portable name = lasx_xvfrecip_d asm-fmts = xd, xj data-types = V4DF, V4DF diff --git a/crates/stdarch-gen-loongarch/lsx.spec b/crates/stdarch-gen-loongarch/lsx.spec index e319daa1cb..d37a924239 100644 --- a/crates/stdarch-gen-loongarch/lsx.spec +++ b/crates/stdarch-gen-loongarch/lsx.spec @@ -1959,11 +1959,13 @@ asm-fmts = vd, vj data-types = V2DF, V2DF /// lsx_vfrecip_s +impl = portable name = lsx_vfrecip_s asm-fmts = vd, vj data-types = V4SF, V4SF /// lsx_vfrecip_d +impl = portable name = lsx_vfrecip_d asm-fmts = vd, vj data-types = V2DF, V2DF diff --git a/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt b/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt index db86381aee..6c72b7bdb7 100644 --- a/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt +++ b/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt @@ -275,6 +275,8 @@ lsx_vrotri_b lsx_vrotri_h lsx_vrotri_w lsx_vrotri_d +lsx_vfrecip_s +lsx_vfrecip_d # LASX intrinsics lasx_xvsll_b @@ -551,3 +553,5 @@ lasx_xvrotri_b lasx_xvrotri_h lasx_xvrotri_w lasx_xvrotri_d +lasx_xvfrecip_s +lasx_xvfrecip_d From 30595c75cf93168c2fc55ec1cc835fb40867ca6c Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Tue, 2 Jun 2026 17:19:03 +0800 Subject: [PATCH 4/4] loongarch: Use `intrinsics::simd` for vfrsqrt --- .../src/loongarch64/lasx/generated.rs | 18 ------------------ .../core_arch/src/loongarch64/lasx/portable.rs | 2 ++ .../core_arch/src/loongarch64/lsx/generated.rs | 18 ------------------ .../core_arch/src/loongarch64/lsx/portable.rs | 2 ++ crates/core_arch/src/loongarch64/simd.rs | 10 ++++++++++ crates/stdarch-gen-loongarch/lasx.spec | 2 ++ crates/stdarch-gen-loongarch/lsx.spec | 2 ++ .../src/portable-intrinsics.txt | 4 ++++ 8 files changed, 22 insertions(+), 36 deletions(-) diff --git a/crates/core_arch/src/loongarch64/lasx/generated.rs b/crates/core_arch/src/loongarch64/lasx/generated.rs index 5a4d71207d..52d68d9ca5 100644 --- a/crates/core_arch/src/loongarch64/lasx/generated.rs +++ b/crates/core_arch/src/loongarch64/lasx/generated.rs @@ -211,10 +211,6 @@ unsafe extern "unadjusted" { fn __lasx_xvfrint_s(a: __v8f32) -> __v8f32; #[link_name = "llvm.loongarch.lasx.xvfrint.d"] fn __lasx_xvfrint_d(a: __v4f64) -> __v4f64; - #[link_name = "llvm.loongarch.lasx.xvfrsqrt.s"] - fn __lasx_xvfrsqrt_s(a: __v8f32) -> __v8f32; - #[link_name = "llvm.loongarch.lasx.xvfrsqrt.d"] - fn __lasx_xvfrsqrt_d(a: __v4f64) -> __v4f64; #[link_name = "llvm.loongarch.lasx.xvflogb.s"] fn __lasx_xvflogb_s(a: __v8f32) -> __v8f32; #[link_name = "llvm.loongarch.lasx.xvflogb.d"] @@ -1751,20 +1747,6 @@ pub fn lasx_xvfrint_d(a: m256d) -> m256d { unsafe { transmute(__lasx_xvfrint_d(transmute(a))) } } -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvfrsqrt_s(a: m256) -> m256 { - unsafe { transmute(__lasx_xvfrsqrt_s(transmute(a))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvfrsqrt_d(a: m256d) -> m256d { - unsafe { transmute(__lasx_xvfrsqrt_d(transmute(a))) } -} - #[inline] #[target_feature(enable = "lasx")] #[unstable(feature = "stdarch_loongarch", issue = "117427")] diff --git a/crates/core_arch/src/loongarch64/lasx/portable.rs b/crates/core_arch/src/loongarch64/lasx/portable.rs index 560c196a2a..35b5522a67 100644 --- a/crates/core_arch/src/loongarch64/lasx/portable.rs +++ b/crates/core_arch/src/loongarch64/lasx/portable.rs @@ -328,6 +328,8 @@ impl_vv!("lasx", lasx_xvneg_w, is::simd_neg, m256i, i32x8); impl_vv!("lasx", lasx_xvneg_d, is::simd_neg, m256i, i64x4); impl_vv!("lasx", lasx_xvfsqrt_s, is::simd_fsqrt, m256, f32x8); impl_vv!("lasx", lasx_xvfsqrt_d, is::simd_fsqrt, m256d, f64x4); +impl_vv!("lasx", lasx_xvfrsqrt_s, ls::simd_frsqrt_s, m256, f32x8); +impl_vv!("lasx", lasx_xvfrsqrt_d, ls::simd_frsqrt_d, m256d, f64x4); impl_vv!("lasx", lasx_xvfrecip_s, ls::simd_frecip_s, m256, f32x8); impl_vv!("lasx", lasx_xvfrecip_d, ls::simd_frecip_d, m256d, f64x4); impl_vv!("lasx", lasx_xvreplve0_b, simd_replve0_b, m256i, i8x32); diff --git a/crates/core_arch/src/loongarch64/lsx/generated.rs b/crates/core_arch/src/loongarch64/lsx/generated.rs index 8fa39dbdfd..087f1bd673 100644 --- a/crates/core_arch/src/loongarch64/lsx/generated.rs +++ b/crates/core_arch/src/loongarch64/lsx/generated.rs @@ -217,10 +217,6 @@ unsafe extern "unadjusted" { fn __lsx_vfrint_s(a: __v4f32) -> __v4f32; #[link_name = "llvm.loongarch.lsx.vfrint.d"] fn __lsx_vfrint_d(a: __v2f64) -> __v2f64; - #[link_name = "llvm.loongarch.lsx.vfrsqrt.s"] - fn __lsx_vfrsqrt_s(a: __v4f32) -> __v4f32; - #[link_name = "llvm.loongarch.lsx.vfrsqrt.d"] - fn __lsx_vfrsqrt_d(a: __v2f64) -> __v2f64; #[link_name = "llvm.loongarch.lsx.vflogb.s"] fn __lsx_vflogb_s(a: __v4f32) -> __v4f32; #[link_name = "llvm.loongarch.lsx.vflogb.d"] @@ -1692,20 +1688,6 @@ pub fn lsx_vfrint_d(a: m128d) -> m128d { unsafe { transmute(__lsx_vfrint_d(transmute(a))) } } -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vfrsqrt_s(a: m128) -> m128 { - unsafe { transmute(__lsx_vfrsqrt_s(transmute(a))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vfrsqrt_d(a: m128d) -> m128d { - unsafe { transmute(__lsx_vfrsqrt_d(transmute(a))) } -} - #[inline] #[target_feature(enable = "lsx")] #[unstable(feature = "stdarch_loongarch", issue = "117427")] diff --git a/crates/core_arch/src/loongarch64/lsx/portable.rs b/crates/core_arch/src/loongarch64/lsx/portable.rs index 3e88b4edd1..8e25ef2edd 100644 --- a/crates/core_arch/src/loongarch64/lsx/portable.rs +++ b/crates/core_arch/src/loongarch64/lsx/portable.rs @@ -227,6 +227,8 @@ impl_vv!("lsx", lsx_vneg_w, is::simd_neg, m128i, i32x4); impl_vv!("lsx", lsx_vneg_d, is::simd_neg, m128i, i64x2); impl_vv!("lsx", lsx_vfsqrt_s, is::simd_fsqrt, m128, f32x4); impl_vv!("lsx", lsx_vfsqrt_d, is::simd_fsqrt, m128d, f64x2); +impl_vv!("lsx", lsx_vfrsqrt_s, ls::simd_frsqrt_s, m128, f32x4); +impl_vv!("lsx", lsx_vfrsqrt_d, ls::simd_frsqrt_d, m128d, f64x2); impl_vv!("lsx", lsx_vfrecip_s, ls::simd_frecip_s, m128, f32x4); impl_vv!("lsx", lsx_vfrecip_d, ls::simd_frecip_d, m128d, f64x2); diff --git a/crates/core_arch/src/loongarch64/simd.rs b/crates/core_arch/src/loongarch64/simd.rs index 23e1620f14..d8d5192f1d 100644 --- a/crates/core_arch/src/loongarch64/simd.rs +++ b/crates/core_arch/src/loongarch64/simd.rs @@ -120,6 +120,16 @@ pub(super) const unsafe fn simd_frecip_d(a: T) -> T { is::simd_div(is::simd_splat(1.0f64), a) } +#[inline(always)] +pub(super) unsafe fn simd_frsqrt_s(a: T) -> T { + ls::simd_frecip_s(is::simd_fsqrt(a)) +} + +#[inline(always)] +pub(super) unsafe fn simd_frsqrt_d(a: T) -> T { + ls::simd_frecip_d(is::simd_fsqrt(a)) +} + #[inline(always)] #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] pub(super) const unsafe fn simd_ld(a: *const i8) -> T { diff --git a/crates/stdarch-gen-loongarch/lasx.spec b/crates/stdarch-gen-loongarch/lasx.spec index 78fcc6278a..9790560f19 100644 --- a/crates/stdarch-gen-loongarch/lasx.spec +++ b/crates/stdarch-gen-loongarch/lasx.spec @@ -1914,11 +1914,13 @@ asm-fmts = xd, xj data-types = V4DF, V4DF /// lasx_xvfrsqrt_s +impl = portable name = lasx_xvfrsqrt_s asm-fmts = xd, xj data-types = V8SF, V8SF /// lasx_xvfrsqrt_d +impl = portable name = lasx_xvfrsqrt_d asm-fmts = xd, xj data-types = V4DF, V4DF diff --git a/crates/stdarch-gen-loongarch/lsx.spec b/crates/stdarch-gen-loongarch/lsx.spec index d37a924239..1d8a150173 100644 --- a/crates/stdarch-gen-loongarch/lsx.spec +++ b/crates/stdarch-gen-loongarch/lsx.spec @@ -2001,11 +2001,13 @@ asm-fmts = vd, vj data-types = V2DF, V2DF /// lsx_vfrsqrt_s +impl = portable name = lsx_vfrsqrt_s asm-fmts = vd, vj data-types = V4SF, V4SF /// lsx_vfrsqrt_d +impl = portable name = lsx_vfrsqrt_d asm-fmts = vd, vj data-types = V2DF, V2DF diff --git a/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt b/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt index 6c72b7bdb7..fadd505a19 100644 --- a/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt +++ b/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt @@ -277,6 +277,8 @@ lsx_vrotri_w lsx_vrotri_d lsx_vfrecip_s lsx_vfrecip_d +lsx_vfrsqrt_s +lsx_vfrsqrt_d # LASX intrinsics lasx_xvsll_b @@ -555,3 +557,5 @@ lasx_xvrotri_w lasx_xvrotri_d lasx_xvfrecip_s lasx_xvfrecip_d +lasx_xvfrsqrt_s +lasx_xvfrsqrt_d