rust-lang · folkertdev · Feb 4, 2026 · Feb 5, 2026 · Feb 5, 2026 · Feb 5, 2026
diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -387,6 +387,27 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                 let pair = self.insert_value(pair, high, 1);
                 pair
             }
+
+            // FIXME move into the branch below when LLVM 22 is the lowest version we support.
+            sym::carryless_mul if crate::llvm_util::get_version() >= (22, 0, 0) => {
+                let ty = args[0].layout.ty;
+                if !ty.is_integral() {
+                    tcx.dcx().emit_err(InvalidMonomorphization::BasicIntegerType {
+                        span,
+                        name,
+                        ty,
+                    });
+                    return Ok(());
+                }
+                let (size, _) = ty.int_size_and_signed(self.tcx);
+                let width = size.bits();
+                let llty = self.type_ix(width);
+
+                let lhs = args[0].immediate();
+                let rhs = args[1].immediate();
+                self.call_intrinsic("llvm.clmul", &[llty], &[lhs, rhs])
+            }
+
             sym::ctlz
             | sym::ctlz_nonzero
             | sym::cttz
@@ -2763,6 +2784,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             | sym::simd_ctlz
             | sym::simd_ctpop
             | sym::simd_cttz
+            | sym::simd_carryless_mul
             | sym::simd_funnel_shl
             | sym::simd_funnel_shr
     ) {
@@ -2787,6 +2809,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             sym::simd_cttz => "llvm.cttz",
             sym::simd_funnel_shl => "llvm.fshl",
             sym::simd_funnel_shr => "llvm.fshr",
+            sym::simd_carryless_mul => "llvm.clmul",
             _ => unreachable!(),
         };
         let int_size = in_elem.int_size_and_signed(bx.tcx()).0.bits();
@@ -2812,6 +2835,17 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
                 &[vec_ty],
                 &[args[0].immediate(), args[1].immediate(), args[2].immediate()],
             )),
+            sym::simd_carryless_mul => {
+                if crate::llvm_util::get_version() >= (22, 0, 0) {
+                    Ok(bx.call_intrinsic(
+                        llvm_intrinsic,
+                        &[vec_ty],
+                        &[args[0].immediate(), args[1].immediate()],
+                    ))
+                } else {
+                    span_bug!(span, "`simd_carryless_mul` needs LLVM 22 or higher");
+                }
+            }
             _ => unreachable!(),
         };
     }

diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs
@@ -345,7 +345,14 @@ impl CodegenBackend for LlvmCodegenBackend {
     }
 
     fn replaced_intrinsics(&self) -> Vec<Symbol> {
-        vec![sym::unchecked_funnel_shl, sym::unchecked_funnel_shr, sym::carrying_mul_add]
+        let mut will_not_use_fallback =
+            vec![sym::unchecked_funnel_shl, sym::unchecked_funnel_shr, sym::carrying_mul_add];
+
+        if llvm_util::get_version() >= (22, 0, 0) {
+            will_not_use_fallback.push(sym::carryless_mul);
+        }
+
+        will_not_use_fallback
     }
 
     fn codegen_crate<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Box<dyn Any> {

diff --git a/compiler/rustc_const_eval/src/interpret/intrinsics.rs b/compiler/rustc_const_eval/src/interpret/intrinsics.rs
@@ -733,6 +733,33 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
             sym::fmuladdf128 => {
                 self.float_muladd_intrinsic::<Quad>(args, dest, MulAddType::Nondeterministic)?
             }
+            sym::carryless_mul => {
+                let size = dest.layout.size;
+
+                let left = self.read_scalar(&args[0])?.to_bits(size)?;
+                let right = self.read_scalar(&args[1])?.to_bits(size)?;
+
+                // perform carry-less multiplication.
+                //
+                // this operation is like long multiplication, but ignores the carries.
+                // that idea corresponds to the xor operator, which is used in the implementation.
+                //
+                // wikipedia has an example https://en.wikipedia.org/wiki/carry-less_product#example
+                let mut result: u128 = 0;
+
+                for i in 0..size.bits() {
+                    // if the i-th bit in right is set
+                    if (right >> i) & 1 != 0 {
+                        // xor result with `left` shifted to the left by i positions
+                        result ^= left << i;
+                    }
+                }
+
+                // Only return the lower bits.
+                result &= u128::MAX >> (128 - size.bits());
+
+                self.write_scalar(Scalar::from_uint(result, dest.layout.size), dest)?;
+            }
 
             // Unsupported intrinsic: skip the return_to_block below.
             _ => return interp_ok(false),

diff --git a/compiler/rustc_hir_analysis/src/check/intrinsic.rs b/compiler/rustc_hir_analysis/src/check/intrinsic.rs
@@ -82,6 +82,7 @@ fn intrinsic_operation_unsafety(tcx: TyCtxt<'_>, intrinsic_id: LocalDefId) -> hi
         | sym::bswap
         | sym::caller_location
         | sym::carrying_mul_add
+        | sym::carryless_mul
         | sym::ceilf16
         | sym::ceilf32
         | sym::ceilf64
@@ -564,6 +565,7 @@ pub(crate) fn check_intrinsic_type(
             (1, 0, vec![param(0), param(0)], param(0))
         }
         sym::saturating_add | sym::saturating_sub => (1, 0, vec![param(0), param(0)], param(0)),
+        sym::carryless_mul => (1, 0, vec![param(0), param(0)], param(0)),
         sym::fadd_fast | sym::fsub_fast | sym::fmul_fast | sym::fdiv_fast | sym::frem_fast => {
             (1, 0, vec![param(0), param(0)], param(0))
         }
@@ -711,7 +713,8 @@ pub(crate) fn check_intrinsic_type(
         | sym::simd_fmin
         | sym::simd_fmax
         | sym::simd_saturating_add
-        | sym::simd_saturating_sub => (1, 0, vec![param(0), param(0)], param(0)),
+        | sym::simd_saturating_sub
+        | sym::simd_carryless_mul => (1, 0, vec![param(0), param(0)], param(0)),
         sym::simd_arith_offset => (2, 0, vec![param(0), param(1)], param(0)),
         sym::simd_neg
         | sym::simd_bswap

diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs
@@ -642,6 +642,7 @@ symbols! {
         caller_location,
         capture_disjoint_fields,
         carrying_mul_add,
+        carryless_mul,
         catch_unwind,
         cause,
         cdylib,
@@ -2083,6 +2084,7 @@ symbols! {
         simd_bitmask,
         simd_bitreverse,
         simd_bswap,
+        simd_carryless_mul,
         simd_cast,
         simd_cast_ptr,
         simd_ceil,

diff --git a/library/core/src/intrinsics/fallback.rs b/library/core/src/intrinsics/fallback.rs
@@ -218,3 +218,101 @@ macro_rules! impl_funnel_shifts {
 impl_funnel_shifts! {
     u8, u16, u32, u64, u128, usize
 }
+
+#[rustc_const_unstable(feature = "core_intrinsics_fallbacks", issue = "none")]
+pub const trait CarrylessMul: Copy + 'static {
+    /// See [`super::carryless_mul`]; we just need the trait indirection to handle
+    /// different types since calling intrinsics with generics doesn't work.
+    fn carryless_mul(self, rhs: Self) -> Self;
+}
+
+macro_rules! impl_carryless_mul{
+    ($($type:ident),*) => {$(
+        /// This approach uses a bitmask of the form `0b100010001...0001` to avoid carry spilling.
+        /// When carries do occur, they wind up in a "hole" of zeros and are subsequently masked
+        /// out of the result.
+        #[rustc_const_unstable(feature = "core_intrinsics_fallbacks", issue = "none")]
+        impl const CarrylessMul for $type {
+            #[inline]
+            fn carryless_mul(self, rhs: Self) -> Self {
+                use crate::num::Wrapping;
+
+                // i.e. 0b100010001...0001 in binary.
+                const MASK: u64 = 0x1111_1111_1111_1111u64;
+
+                const M0: $type = MASK as $type;
+                const M1: $type = M0 << 1;
+                const M2: $type = M1 << 1;
+                const M3: $type = M2 << 1;
+
+                let x = self;
+                let y = rhs;
+
+                let x0 = Wrapping(x & M0);
+                let x1 = Wrapping(x & M1);
+                let x2 = Wrapping(x & M2);
+                let x3 = Wrapping(x & M3);
+
+                let y0 = Wrapping(y & M0);
+                let y1 = Wrapping(y & M1);
+                let y2 = Wrapping(y & M2);
+                let y3 = Wrapping(y & M3);
+
+                let z0 = (x0 * y0) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1);
+                let z1 = (x0 * y1) ^ (x1 * y0) ^ (x2 * y3) ^ (x3 * y2);
+                let z2 = (x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y3);
+                let z3 = (x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0);
+
+                (z0.0 & M0) | (z1.0 & M1) | (z2.0 & M2) | (z3.0 & M3)
+            }
+        }
+    )*};
+}
+
+impl_carryless_mul! {
+    u8, u16, u32, u64, usize
+}
+
+#[rustc_const_unstable(feature = "core_intrinsics_fallbacks", issue = "none")]
+impl const CarrylessMul for u128 {
+    #[inline]
+    fn carryless_mul(self, rhs: Self) -> Self {
+        let l = u64::carryless_mul(self as u64, rhs as u64);
+        let lh = u64::carryless_mul(self as u64, (rhs >> 64) as u64);
+        let hl = u64::carryless_mul((self >> 64) as u64, rhs as u64);
+        let h = lh ^ hl ^ carryless_mul_high(self as u64, rhs as u64);
+        ((h as u128) << 64) | l as u128
+    }
+}
+
+#[rustc_const_unstable(feature = "core_intrinsics_fallbacks", issue = "none")]
+#[inline]
+const fn carryless_mul_high(x: u64, y: u64) -> u64 {
+    // i.e. 0b100010001...0001 in binary.
+    const MASK: u64 = 0x1111_1111_1111_1111u64;
+
+    const M0: u64 = MASK;
+    const M1: u64 = M0 << 1;
+    const M2: u64 = M1 << 1;
+    const M3: u64 = M2 << 1;
+
+    macro_rules! mul {
+        ($x_mask_shift:literal, $y_mask_shift:literal) => {{
+            let x = x & (MASK << $x_mask_shift);
+            let y = y & (MASK << $y_mask_shift);
+            crate::hint::select_unpredictable(
+                x == MASK << $x_mask_shift && y == MASK << $y_mask_shift,
+                // only case where the multiply overflows the 4-bit parts
+                0x0101_0101_0101_0101u64 << ($x_mask_shift + $y_mask_shift),
+                x.carrying_mul(y, 0).1,
+            )
+        }};
+    }
+
+    let z0 = mul!(0, 0) ^ mul!(1, 3) ^ mul!(2, 2) ^ mul!(3, 1);
+    let z1 = mul!(0, 1) ^ mul!(1, 0) ^ mul!(2, 3) ^ mul!(3, 2);
+    let z2 = mul!(0, 2) ^ mul!(1, 1) ^ mul!(2, 0) ^ mul!(3, 3);
+    let z3 = mul!(0, 3) ^ mul!(1, 2) ^ mul!(2, 1) ^ mul!(3, 0);
+
+    (z0 & M0) | (z1 & M1) | (z2 & M2) | (z3 & M3)
+}
diff --git a/library/core/src/intrinsics/mod.rs b/library/core/src/intrinsics/mod.rs
@@ -2179,6 +2179,20 @@ pub const unsafe fn unchecked_funnel_shr<T: [const] fallback::FunnelShift>(
     unsafe { a.unchecked_funnel_shr(b, shift) }
 }
 
+/// Carryless multiply.
+///
+/// Safe versions of this intrinsic are available on the integer primitives
+/// via the `carryless_mul` method. For example, [`u32::carryless_mul`].
+#[rustc_intrinsic]
+#[rustc_nounwind]
+#[rustc_const_unstable(feature = "uint_carryless_mul", issue = "152080")]
+#[unstable(feature = "uint_carryless_mul", issue = "152080")]
+pub const fn carryless_mul<T: [const] fallback::CarrylessMul>(a: T, b: T) -> T {
+    // NOTE: while this implementation could serve as the specification, rustc_const_eval
+    // actually implements a simpler but less efficient variant as the specification.
+    a.carryless_mul(b)
+}
+
 /// This is an implementation detail of [`crate::ptr::read`] and should
 /// not be used anywhere else.  See its comments for why this exists.
 ///

diff --git a/library/core/src/intrinsics/simd.rs b/library/core/src/intrinsics/simd.rs
@@ -162,6 +162,18 @@ pub const unsafe fn simd_funnel_shl<T>(a: T, b: T, shift: T) -> T;
 #[rustc_nounwind]
 pub const unsafe fn simd_funnel_shr<T>(a: T, b: T, shift: T) -> T;
 
+/// Compute the carry-less product.
+///
+/// This is similar to long multiplication except that the carry is discarded.
+///
+/// This operation can be used to model multiplication in `GF(2)[X]`, the polynomial
+/// ring over `GF(2)`.
+///
+/// `T` must be a vector of integers.
+#[rustc_intrinsic]
+#[rustc_nounwind]
+pub unsafe fn simd_carryless_mul<T>(a: T, b: T) -> T;
+
 /// "And"s vectors elementwise.
 ///
 /// `T` must be a vector of integers.

diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs
@@ -188,6 +188,7 @@
 #![feature(trait_alias)]
 #![feature(transparent_unions)]
 #![feature(try_blocks)]
+#![feature(uint_carryless_mul)]
 #![feature(unboxed_closures)]
 #![feature(unsized_fn_params)]
 #![feature(with_negative_coherence)]