From 8a05f10c9321602920ceeb1c19103aef51a831ff Mon Sep 17 00:00:00 2001
From: kunitoki <kunitoki@gmail.com>
Date: Fri, 29 May 2026 10:17:59 +0200
Subject: [PATCH 1/5] Initial checkin of yup_simd

---
 cmake/platforms/yup_emscripten.cmake          |   2 +
 .../convolution/yup_PartitionedConvolver.cpp  | 105 +----
 .../yup_dsp/metering/yup_LevelProcessor.cpp   |  18 +-
 modules/yup_dsp/utilities/yup_DspMath.cpp     |  73 +---
 modules/yup_graphics/graphics/yup_Color.h     |  22 +
 modules/yup_graphics/imaging/yup_Image.cpp    |  41 +-
 .../primitives/yup_AffineTransform.h          |   6 +
 modules/yup_graphics/yup_graphics.h           |   3 +-
 .../buffers/yup_AffineTransformOperations.cpp |  63 +++
 .../buffers/yup_AffineTransformOperations.h   |  38 ++
 .../buffers/yup_ColorVectorOperations.cpp     | 113 +++++
 .../buffers/yup_ColorVectorOperations.h       |  53 +++
 .../buffers/yup_ComplexVectorOperations.cpp   | 172 ++++++++
 .../buffers/yup_ComplexVectorOperations.h     |  62 +++
 .../buffers/yup_FloatVectorOperations.cpp     | 179 ++++++++
 .../buffers/yup_FloatVectorOperations.h       |  18 +-
 modules/yup_simd/types/yup_EigenAdapters.h    |  43 ++
 modules/yup_simd/types/yup_SIMDRegister.h     | 390 ++++++++++++++++++
 modules/yup_simd/types/yup_Vec.h              | 137 ++++++
 modules/yup_simd/yup_simd.cpp                 |   3 +
 modules/yup_simd/yup_simd.h                   |  22 +-
 tests/yup_graphics/yup_Image.cpp              |  54 +++
 tests/yup_simd.cpp                            |   4 +
 .../yup_AffineTransformOperations.cpp         |  50 +++
 tests/yup_simd/yup_ColorVectorOperations.cpp  | 128 ++++++
 .../yup_simd/yup_ComplexVectorOperations.cpp  |  93 +++++
 tests/yup_simd/yup_FloatVectorOperations.cpp  |  49 +++
 tests/yup_simd/yup_SIMDRegister.cpp           |  59 +++
 thirdparty/xsimd/xsimd.h                      |  45 ++
 29 files changed, 1827 insertions(+), 218 deletions(-)
 create mode 100644 modules/yup_simd/buffers/yup_AffineTransformOperations.cpp
 create mode 100644 modules/yup_simd/buffers/yup_AffineTransformOperations.h
 create mode 100644 modules/yup_simd/buffers/yup_ColorVectorOperations.cpp
 create mode 100644 modules/yup_simd/buffers/yup_ColorVectorOperations.h
 create mode 100644 modules/yup_simd/buffers/yup_ComplexVectorOperations.cpp
 create mode 100644 modules/yup_simd/buffers/yup_ComplexVectorOperations.h
 create mode 100644 modules/yup_simd/types/yup_EigenAdapters.h
 create mode 100644 modules/yup_simd/types/yup_SIMDRegister.h
 create mode 100644 modules/yup_simd/types/yup_Vec.h
 create mode 100644 tests/yup_simd/yup_AffineTransformOperations.cpp
 create mode 100644 tests/yup_simd/yup_ColorVectorOperations.cpp
 create mode 100644 tests/yup_simd/yup_ComplexVectorOperations.cpp
 create mode 100644 tests/yup_simd/yup_SIMDRegister.cpp
 create mode 100644 thirdparty/xsimd/xsimd.h
diff --git a/cmake/platforms/yup_emscripten.cmake b/cmake/platforms/yup_emscripten.cmake
index 4710c7eca..c1f36d3d7 100644
--- a/cmake/platforms/yup_emscripten.cmake
+++ b/cmake/platforms/yup_emscripten.cmake
@@ -16,3 +16,5 @@
 #   DISCLAIMED.
 #
 # ==============================================================================
+
+add_compile_options (-msimd128)
diff --git a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp
index ac027993e..7e1a0a47b 100644
--- a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp
+++ b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp
@@ -22,109 +22,6 @@
 namespace yup
 {
 
-//==============================================================================
-
-/** Performs Y += A * B (complex multiply accumulate) where A, B, and Y
-    are arrays of interleaved complex<float> values [real, imag, real, imag...].
-
-    @param A pointer to input complex array
-    @param B pointer to input complex array
-    @param Y pointer to output complex array (accumulated)
-    @param complexPairs number of complex pairs (not number of floats!)
-*/
-static void complexMultiplyAccumulate (const float* __restrict A, const float* __restrict B, float* __restrict Y, int complexPairs) noexcept
-{
-    int i = 0;
-
-#if YUP_USE_AVX_INTRINSICS
-    constexpr int simdWidth = 4; // AVX2 path: process 4 complex pairs (8 floats) at a time
-    for (; i <= complexPairs - simdWidth; i += simdWidth)
-    {
-        const int idx = i * 2;
-
-        __m256 a = _mm256_loadu_ps (A + idx);
-        __m256 b = _mm256_loadu_ps (B + idx);
-        __m256 y = _mm256_loadu_ps (Y + idx);
-
-        const __m256 a_shuffled = _mm256_permute_ps (a, _MM_SHUFFLE (2, 3, 0, 1));
-        const __m256 b_shuffled = _mm256_permute_ps (b, _MM_SHUFFLE (2, 3, 0, 1));
-
-        __m256 realPart = _mm256_fmsub_ps (a, b, _mm256_mul_ps (a_shuffled, b_shuffled));
-        __m256 imagPart = _mm256_fmadd_ps (a, b_shuffled, _mm256_mul_ps (a_shuffled, b));
-
-        const __m256 interleaved = _mm256_blend_ps (realPart, imagPart, 0b10101010);
-
-        y = _mm256_add_ps (y, interleaved);
-        _mm256_storeu_ps (Y + idx, y);
-    }
-
-#elif YUP_USE_SSE_INTRINSICS
-    constexpr int simdWidth = 2; // SSE path: process 2 complex pairs (4 floats) at a time
-    for (; i <= complexPairs - simdWidth; i += simdWidth)
-    {
-        const int idx = i * 2;
-
-        __m128 a = _mm_loadu_ps (A + idx);
-        __m128 b = _mm_loadu_ps (B + idx);
-        __m128 y = _mm_loadu_ps (Y + idx);
-
-        const __m128 a_shuffled = _mm_shuffle_ps (a, a, _MM_SHUFFLE (2, 3, 0, 1));
-        const __m128 b_shuffled = _mm_shuffle_ps (b, b, _MM_SHUFFLE (2, 3, 0, 1));
-
-        __m128 realPart = _mm_sub_ps (_mm_mul_ps (a, b), _mm_mul_ps (a_shuffled, b_shuffled));
-        __m128 imagPart = _mm_add_ps (_mm_mul_ps (a, b_shuffled), _mm_mul_ps (a_shuffled, b));
-
-        const __m128 interleaved = _mm_unpacklo_ps (realPart, imagPart);
-
-        y = _mm_add_ps (y, interleaved);
-        _mm_storeu_ps (Y + idx, y);
-    }
-
-#elif YUP_USE_ARM_NEON
-    constexpr int simdWidth = 4;
-    for (; i <= complexPairs - simdWidth; i += simdWidth)
-    {
-        const int idx = i * 2;
-
-        float32x4x2_t a = vld2q_f32 (A + idx);
-        float32x4x2_t b = vld2q_f32 (B + idx);
-        float32x4x2_t y = vld2q_f32 (Y + idx);
-
-        float32x4_t ar = a.val[0], ai = a.val[1];
-        float32x4_t br = b.val[0], bi = b.val[1];
-        float32x4_t yr = y.val[0], yi = y.val[1];
-
-        float32x4_t real = vmulq_f32 (ar, br);
-        real = vfmsq_f32 (real, ai, bi);
-        float32x4_t imag = vmulq_f32 (ar, bi);
-        imag = vfmaq_f32 (imag, ai, br);
-
-        yr = vaddq_f32 (yr, real);
-        yi = vaddq_f32 (yi, imag);
-
-        float32x4x2_t out = { yr, yi };
-        vst2q_f32 (Y + idx, out); // interleave back
-    }
-
-#endif
-
-    for (; i < complexPairs; ++i)
-    {
-        const int ri = i * 2;
-        const int ii = ri + 1;
-
-        const float ar = A[ri];
-        const float ai = A[ii];
-        const float br = B[ri];
-        const float bi = B[ii];
-
-        Y[ri] += ar * br - ai * bi;
-        Y[ii] += ar * bi + ai * br;
-    }
-}
-
-//==============================================================================
-
 class PartitionedConvolver::FFTLayer
 {
 public:
@@ -254,7 +151,7 @@ class PartitionedConvolver::FFTLayer
             const float* H = frequencyPartitions[p].data();
 
             // fftSize_/2 gives the number of complex pairs for real FFT
-            complexMultiplyAccumulate (X, H, frequencyBuffer.data(), fftSize / 2);
+            ComplexVectorOperations::multiplyAccumulate (X, H, frequencyBuffer.data(), fftSize / 2);
 
             // Move to next older spectrum
             xIndex++;
diff --git a/modules/yup_dsp/metering/yup_LevelProcessor.cpp b/modules/yup_dsp/metering/yup_LevelProcessor.cpp
index fca19e17e..f83954102 100644
--- a/modules/yup_dsp/metering/yup_LevelProcessor.cpp
+++ b/modules/yup_dsp/metering/yup_LevelProcessor.cpp
@@ -73,16 +73,7 @@ void LevelProcessor::processPeak (const float* samples, int numSamples, float& p
     jassert (samples != nullptr);
     jassert (numSamples >= 0);
 
-    float peak = 0.0f;
-
-    for (int i = 0; i < numSamples; ++i)
-    {
-        const float absSample = std::abs (samples[i]);
-        if (absSample > peak)
-            peak = absSample;
-    }
-
-    peakOut = peak;
+    peakOut = FloatVectorOperations::maxAbs (samples, numSamples);
 }
 
 void LevelProcessor::processRMS (const float* samples, int numSamples, float& rmsOut) noexcept
@@ -93,12 +84,7 @@ void LevelProcessor::processRMS (const float* samples, int numSamples, float& rm
     if (rmsBuffer.empty() || rmsBufferSize == 0)
     {
         // Fallback: simple RMS if buffer not initialized
-        double sumSquares = 0.0;
-        for (int i = 0; i < numSamples; ++i)
-        {
-            const float sample = samples[i];
-            sumSquares += sample * sample;
-        }
+        const double sumSquares = FloatVectorOperations::sumSquares (samples, numSamples);
         rmsOut = numSamples > 0 ? static_cast<float> (std::sqrt (sumSquares / numSamples)) : 0.0f;
         return;
     }
diff --git a/modules/yup_dsp/utilities/yup_DspMath.cpp b/modules/yup_dsp/utilities/yup_DspMath.cpp
index b14bade41..8b31c31c3 100644
--- a/modules/yup_dsp/utilities/yup_DspMath.cpp
+++ b/modules/yup_dsp/utilities/yup_DspMath.cpp
@@ -27,78 +27,7 @@ namespace yup
 template <>
 float dotProduct (const float* __restrict a, const float* __restrict b, std::size_t length) noexcept
 {
-    float accumulation = 0.0f;
-
-#if YUP_ENABLE_VDSP
-    vDSP_dotpr (a, 1, b, 1, &accumulation, length);
-
-#else
-    std::size_t i = 0;
-
-#if YUP_USE_AVX_INTRINSICS && YUP_USE_FMA_INTRINSICS
-    __m256 vacc = _mm256_setzero_ps();
-    for (; i + 8 <= length; i += 8)
-    {
-        __m256 va = _mm256_loadu_ps (a + i);
-        __m256 vb = _mm256_loadu_ps (b + i);
-        vacc = _mm256_fmadd_ps (va, vb, vacc);
-    }
-    __m128 low = _mm256_castps256_ps128 (vacc);
-    __m128 high = _mm256_extractf128_ps (vacc, 1);
-    __m128 vsum = _mm_add_ps (low, high);
-    vsum = _mm_hadd_ps (vsum, vsum);
-    vsum = _mm_hadd_ps (vsum, vsum);
-    accumulation += _mm_cvtss_f32 (vsum);
-
-#elif YUP_USE_SSE_INTRINSICS
-    __m128 vacc = _mm_setzero_ps();
-#if YUP_USE_FMA_INTRINSICS
-    for (; i + 4 <= length; i += 4)
-    {
-        __m128 va = _mm_loadu_ps (a + i);
-        __m128 vb = _mm_loadu_ps (b + i);
-        vacc = _mm_fmadd_ps (va, vb, vacc);
-    }
-#else
-    for (; i + 4 <= length; i += 4)
-    {
-        __m128 va = _mm_loadu_ps (a + i);
-        __m128 vb = _mm_loadu_ps (b + i);
-        vacc = _mm_add_ps (vacc, _mm_mul_ps (va, vb));
-    }
-#endif
-    __m128 shuf = _mm_shuffle_ps (vacc, vacc, _MM_SHUFFLE (2, 3, 0, 1));
-    __m128 sums = _mm_add_ps (vacc, shuf);
-    shuf = _mm_movehl_ps (shuf, sums);
-    sums = _mm_add_ss (sums, shuf);
-    accumulation += _mm_cvtss_f32 (sums);
-
-#elif YUP_USE_ARM_NEON
-    float32x4_t vacc = vdupq_n_f32 (0.0f);
-    for (; i + 4 <= length; i += 4)
-    {
-        float32x4_t va = vld1q_f32 (a + i);
-        float32x4_t vb = vld1q_f32 (b + i);
-        vacc = vmlaq_f32 (vacc, va, vb);
-    }
-#if YUP_64BIT
-    accumulation += vaddvq_f32 (vacc);
-#else
-    float32x2_t vlow = vget_low_f32 (vacc);
-    float32x2_t vhigh = vget_high_f32 (vacc);
-    float32x2_t vsum2 = vpadd_f32 (vlow, vhigh);
-    vsum2 = vpadd_f32 (vsum2, vsum2);
-    accumulation += vget_lane_f32 (vsum2, 0);
-#endif
-
-#endif
-
-    // Handle remaining samples
-    for (; i < length; ++i)
-        accumulation += a[i] * b[i];
-#endif
-
-    return accumulation;
+    return FloatVectorOperations::dotProduct (a, b, length);
 }
 
 } // namespace yup
diff --git a/modules/yup_graphics/graphics/yup_Color.h b/modules/yup_graphics/graphics/yup_Color.h
index fe6b8abe5..3484d144f 100644
--- a/modules/yup_graphics/graphics/yup_Color.h
+++ b/modules/yup_graphics/graphics/yup_Color.h
@@ -1176,4 +1176,26 @@ class YUP_API Color
     };
 };
 
+/** Converts a Color to a normalized RGBA vector. */
+inline Vec4f toVec4f (Color color) noexcept
+{
+    return {
+        color.getRedFloat(),
+        color.getGreenFloat(),
+        color.getBlueFloat(),
+        color.getAlphaFloat()
+    };
+}
+
+/** Converts a normalized RGBA vector to a Color. */
+inline Color toColor (Vec4f color) noexcept
+{
+    return {
+        static_cast<uint8> (roundToInt (jlimit (0.0f, 1.0f, color.a) * 255.0f)),
+        static_cast<uint8> (roundToInt (jlimit (0.0f, 1.0f, color.r) * 255.0f)),
+        static_cast<uint8> (roundToInt (jlimit (0.0f, 1.0f, color.g) * 255.0f)),
+        static_cast<uint8> (roundToInt (jlimit (0.0f, 1.0f, color.b) * 255.0f))
+    };
+}
+
 } // namespace yup
diff --git a/modules/yup_graphics/imaging/yup_Image.cpp b/modules/yup_graphics/imaging/yup_Image.cpp
index f96475cd2..abdcc623e 100644
--- a/modules/yup_graphics/imaging/yup_Image.cpp
+++ b/modules/yup_graphics/imaging/yup_Image.cpp
@@ -24,11 +24,6 @@ namespace yup
 
 namespace
 {
-uint8 premultiplyComponent (uint8 component, uint8 alpha) noexcept
-{
-    return static_cast<uint8> ((static_cast<uint32> (component) * static_cast<uint32> (alpha) + 127u) / 255u);
-}
-
 uint8 unpremultiplyComponent (uint8 component, uint8 alpha) noexcept
 {
     if (alpha == 0)
@@ -38,16 +33,6 @@ uint8 unpremultiplyComponent (uint8 component, uint8 alpha) noexcept
     return static_cast<uint8> (value > 255u ? 255u : value);
 }
 
-void writeARGBAsRGBAPremultiplied (uint32 argb, uint8* destination) noexcept
-{
-    const auto alpha = static_cast<uint8> ((argb >> 24) & 0xFF);
-
-    destination[0] = premultiplyComponent (static_cast<uint8> ((argb >> 16) & 0xFF), alpha);
-    destination[1] = premultiplyComponent (static_cast<uint8> ((argb >> 8) & 0xFF), alpha);
-    destination[2] = premultiplyComponent (static_cast<uint8> (argb & 0xFF), alpha);
-    destination[3] = alpha;
-}
-
 std::unique_ptr<const uint8[]> copyRGBAPremultipliedAsRGBA (const rive::Bitmap& bitmap)
 {
     const auto numPixels = static_cast<size_t> (bitmap.width()) * static_cast<size_t> (bitmap.height());
@@ -256,16 +241,26 @@ bool Image::createTextureIfNotPresent (GraphicsContext& context) const
     if (renderContext == nullptr || renderContext->impl() == nullptr)
         return false;
 
-    std::vector<uint8> texturePixels (static_cast<size_t> (width) * static_cast<size_t> (height) * 4u);
-    auto* destination = texturePixels.data();
+    const auto numPixels = static_cast<size_t> (width) * static_cast<size_t> (height);
+    std::vector<uint8> texturePixels (numPixels * 4u);
+
+    const auto sourceData = bitmapData->getRawData();
+    const auto* source = sourceData.data();
 
-    for (int y = 0; y < height; ++y)
+    switch (bitmapData->getPixelFormat())
     {
-        for (int x = 0; x < width; ++x)
-        {
-            writeARGBAsRGBAPremultiplied (bitmapData->getPixel (x, y), destination);
-            destination += 4;
-        }
+        case PixelFormat::Grayscale:
+            ColorVectorOperations::convertGrayscaleToRGBA (source, texturePixels.data(), static_cast<int> (numPixels));
+            break;
+
+        case PixelFormat::RGB:
+            ColorVectorOperations::convertRGBToRGBA (source, texturePixels.data(), static_cast<int> (numPixels));
+            break;
+
+        case PixelFormat::RGBA:
+            std::memcpy (texturePixels.data(), source, texturePixels.size());
+            ColorVectorOperations::premultiplyRGBA (texturePixels.data(), static_cast<int> (numPixels));
+            break;
     }
 
     texture = renderContext->impl()->makeImageTexture (
diff --git a/modules/yup_graphics/primitives/yup_AffineTransform.h b/modules/yup_graphics/primitives/yup_AffineTransform.h
index 96625424a..ca264255b 100644
--- a/modules/yup_graphics/primitives/yup_AffineTransform.h
+++ b/modules/yup_graphics/primitives/yup_AffineTransform.h
@@ -949,6 +949,12 @@ constexpr float get (const AffineTransform& transform) noexcept
         static_assert (dependentFalse<I>);
 }
 
+/** Transforms separate x/y point arrays in-place using an AffineTransform. */
+inline void batchTransformPoints (float* xs, float* ys, int numPoints, const AffineTransform& transform) noexcept
+{
+    AffineTransformOperations::transformPoints (xs, ys, numPoints, transform.getScaleX(), transform.getShearX(), transform.getTranslateX(), transform.getShearY(), transform.getScaleY(), transform.getTranslateY());
+}
+
 } // namespace yup
 
 #ifndef DOXYGEN
diff --git a/modules/yup_graphics/yup_graphics.h b/modules/yup_graphics/yup_graphics.h
index fea89c81a..089d37798 100644
--- a/modules/yup_graphics/yup_graphics.h
+++ b/modules/yup_graphics/yup_graphics.h
@@ -32,7 +32,7 @@
     website:            https://github.com/kunitoki/yup
     license:            ISC
 
-    dependencies:       yup_core rive rive_renderer
+    dependencies:       yup_core yup_simd rive rive_renderer
     appleFrameworks:    Metal
     searchpaths:        native
 
@@ -45,6 +45,7 @@
 #define YUP_GRAPHICS_H_INCLUDED
 
 #include <yup_core/yup_core.h>
+#include <yup_simd/yup_simd.h>
 
 #include <rive_renderer/rive_renderer.h>
 #include <rive_decoders/rive_decoders.h>
diff --git a/modules/yup_simd/buffers/yup_AffineTransformOperations.cpp b/modules/yup_simd/buffers/yup_AffineTransformOperations.cpp
new file mode 100644
index 000000000..6d88aa991
--- /dev/null
+++ b/modules/yup_simd/buffers/yup_AffineTransformOperations.cpp
@@ -0,0 +1,63 @@
+/*
+  ==============================================================================
+
+   This file is part of the YUP library.
+   Copyright (c) 2026 - kunitoki@gmail.com
+
+   YUP is an open source library subject to open-source licensing.
+
+   The code included in this file is provided under the terms of the ISC license
+   http://www.isc.org/downloads/software-support-policy/isc-license. Permission
+   to use, copy, modify, and/or distribute this software for any purpose with or
+   without fee is hereby granted provided that the above copyright notice and
+   this permission notice appear in all copies.
+
+   YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace yup
+{
+
+void YUP_CALLTYPE AffineTransformOperations::transformPoints (float* xs, float* ys, int numPoints, float sx, float shx, float tx, float shy, float sy, float ty) noexcept
+{
+    transformPoints (xs, ys, xs, ys, numPoints, sx, shx, tx, shy, sy, ty);
+}
+
+void YUP_CALLTYPE AffineTransformOperations::transformPoints (const float* srcXs, const float* srcYs, float* dstXs, float* dstYs, int numPoints, float sx, float shx, float tx, float shy, float sy, float ty) noexcept
+{
+    int i = 0;
+
+    const auto sx4 = Float4::broadcast (sx);
+    const auto shx4 = Float4::broadcast (shx);
+    const auto tx4 = Float4::broadcast (tx);
+    const auto shy4 = Float4::broadcast (shy);
+    const auto sy4 = Float4::broadcast (sy);
+    const auto ty4 = Float4::broadcast (ty);
+
+    for (; i + Float4::size <= numPoints; i += Float4::size)
+    {
+        const auto x = Float4::loadUnaligned (srcXs + i);
+        const auto y = Float4::loadUnaligned (srcYs + i);
+
+        const auto outX = tx4.mulAdd (sx4, x).mulAdd (shx4, y);
+        const auto outY = ty4.mulAdd (shy4, x).mulAdd (sy4, y);
+
+        outX.storeUnaligned (dstXs + i);
+        outY.storeUnaligned (dstYs + i);
+    }
+
+    for (; i < numPoints; ++i)
+    {
+        const float x = srcXs[i];
+        const float y = srcYs[i];
+
+        dstXs[i] = sx * x + shx * y + tx;
+        dstYs[i] = shy * x + sy * y + ty;
+    }
+}
+
+} // namespace yup
diff --git a/modules/yup_simd/buffers/yup_AffineTransformOperations.h b/modules/yup_simd/buffers/yup_AffineTransformOperations.h
new file mode 100644
index 000000000..ae68cb2ee
--- /dev/null
+++ b/modules/yup_simd/buffers/yup_AffineTransformOperations.h
@@ -0,0 +1,38 @@
+/*
+  ==============================================================================
+
+   This file is part of the YUP library.
+   Copyright (c) 2026 - kunitoki@gmail.com
+
+   YUP is an open source library subject to open-source licensing.
+
+   The code included in this file is provided under the terms of the ISC license
+   http://www.isc.org/downloads/software-support-policy/isc-license. Permission
+   to use, copy, modify, and/or distribute this software for any purpose with or
+   without fee is hereby granted provided that the above copyright notice and
+   this permission notice appear in all copies.
+
+   YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+#pragma once
+
+namespace yup
+{
+
+//==============================================================================
+class YUP_API AffineTransformOperations
+{
+public:
+    /** Transforms points stored in separate x/y arrays in-place. */
+    static void YUP_CALLTYPE transformPoints (float* xs, float* ys, int numPoints, float sx, float shx, float tx, float shy, float sy, float ty) noexcept;
+
+    /** Transforms points stored in separate x/y source arrays into destination arrays. */
+    static void YUP_CALLTYPE transformPoints (const float* srcXs, const float* srcYs, float* dstXs, float* dstYs, int numPoints, float sx, float shx, float tx, float shy, float sy, float ty) noexcept;
+};
+
+} // namespace yup
diff --git a/modules/yup_simd/buffers/yup_ColorVectorOperations.cpp b/modules/yup_simd/buffers/yup_ColorVectorOperations.cpp
new file mode 100644
index 000000000..dbde342f1
--- /dev/null
+++ b/modules/yup_simd/buffers/yup_ColorVectorOperations.cpp
@@ -0,0 +1,113 @@
+/*
+  ==============================================================================
+
+   This file is part of the YUP library.
+   Copyright (c) 2026 - kunitoki@gmail.com
+
+   YUP is an open source library subject to open-source licensing.
+
+   The code included in this file is provided under the terms of the ISC license
+   http://www.isc.org/downloads/software-support-policy/isc-license. Permission
+   to use, copy, modify, and/or distribute this software for any purpose with or
+   without fee is hereby granted provided that the above copyright notice and
+   this permission notice appear in all copies.
+
+   YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace yup
+{
+
+namespace
+{
+static uint32 premultiplyComponent (uint32 component, uint32 alpha) noexcept
+{
+    return (component * alpha + 127u) / 255u;
+}
+
+static uint32 packRGBA (uint32 red, uint32 green, uint32 blue, uint32 alpha) noexcept
+{
+    return red | (green << 8) | (blue << 16) | (alpha << 24);
+}
+} // namespace
+
+//==============================================================================
+void YUP_CALLTYPE ColorVectorOperations::premultiplyARGB (uint32* pixels, int numPixels) noexcept
+{
+    for (int i = 0; i < numPixels; ++i)
+    {
+        const uint32 pixel = pixels[i];
+        const uint32 alpha = (pixel >> 24) & 0xffu;
+        const uint32 red = premultiplyComponent ((pixel >> 16) & 0xffu, alpha);
+        const uint32 green = premultiplyComponent ((pixel >> 8) & 0xffu, alpha);
+        const uint32 blue = premultiplyComponent (pixel & 0xffu, alpha);
+
+        pixels[i] = (alpha << 24) | (red << 16) | (green << 8) | blue;
+    }
+}
+
+void YUP_CALLTYPE ColorVectorOperations::premultiplyRGBA (uint8* pixels, int numPixels) noexcept
+{
+    auto* pixel = pixels;
+
+    for (int i = 0; i < numPixels; ++i)
+    {
+        const uint32 alpha = pixel[3];
+
+        pixel[0] = static_cast<uint8> (premultiplyComponent (pixel[0], alpha));
+        pixel[1] = static_cast<uint8> (premultiplyComponent (pixel[1], alpha));
+        pixel[2] = static_cast<uint8> (premultiplyComponent (pixel[2], alpha));
+        pixel += 4;
+    }
+}
+
+void YUP_CALLTYPE ColorVectorOperations::convertARGBtoRGBA (const uint32* src, uint32* dst, int numPixels) noexcept
+{
+    for (int i = 0; i < numPixels; ++i)
+    {
+        const uint32 pixel = src[i];
+        dst[i] = ((pixel & 0x00ffffffu) << 8) | ((pixel >> 24) & 0xffu);
+    }
+}
+
+void YUP_CALLTYPE ColorVectorOperations::convertGrayscaleToRGBA (const uint8* src, uint8* dst, int numPixels) noexcept
+{
+    for (int i = 0; i < numPixels; ++i)
+    {
+        const uint32 value = *src++;
+        const auto rgba = packRGBA (value, value, value, 255u);
+        std::memcpy (dst, &rgba, sizeof (rgba));
+        dst += 4;
+    }
+}
+
+void YUP_CALLTYPE ColorVectorOperations::convertRGBToRGBA (const uint8* src, uint8* dst, int numPixels) noexcept
+{
+    for (int i = 0; i < numPixels; ++i)
+    {
+        const auto rgba = packRGBA (src[0], src[1], src[2], 255u);
+        std::memcpy (dst, &rgba, sizeof (rgba));
+        src += 3;
+        dst += 4;
+    }
+}
+
+void YUP_CALLTYPE ColorVectorOperations::lerpRows (const float* rowA, const float* rowB, float* dst, float t, int numPixels) noexcept
+{
+    const auto t4 = Float4::broadcast (t);
+    const auto minusOne = Float4::broadcast (-1.0f);
+    int i = 0;
+
+    for (; i < numPixels; ++i)
+    {
+        const auto a = Float4::loadUnaligned (rowA + i * 4);
+        const auto b = Float4::loadUnaligned (rowB + i * 4);
+        a.mulAdd (b + (a * minusOne), t4).storeUnaligned (dst + i * 4);
+    }
+}
+
+} // namespace yup
diff --git a/modules/yup_simd/buffers/yup_ColorVectorOperations.h b/modules/yup_simd/buffers/yup_ColorVectorOperations.h
new file mode 100644
index 000000000..aabc19982
--- /dev/null
+++ b/modules/yup_simd/buffers/yup_ColorVectorOperations.h
@@ -0,0 +1,53 @@
+/*
+  ==============================================================================
+
+   This file is part of the YUP library.
+   Copyright (c) 2026 - kunitoki@gmail.com
+
+   YUP is an open source library subject to open-source licensing.
+
+   The code included in this file is provided under the terms of the ISC license
+   http://www.isc.org/downloads/software-support-policy/isc-license. Permission
+   to use, copy, modify, and/or distribute this software for any purpose with or
+   without fee is hereby granted provided that the above copyright notice and
+   this permission notice appear in all copies.
+
+   YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+#pragma once
+
+namespace yup
+{
+
+//==============================================================================
+class YUP_API ColorVectorOperations
+{
+public:
+    /** Premultiplies alpha in-place on a row of packed `0xAARRGGBB` pixels. */
+    static void YUP_CALLTYPE premultiplyARGB (uint32* pixels, int numPixels) noexcept;
+
+    /** Premultiplies alpha in-place on a row of RGBA byte pixels. */
+    static void YUP_CALLTYPE premultiplyRGBA (uint8* pixels, int numPixels) noexcept;
+
+    /** Converts packed `0xAARRGGBB` pixels to packed `0xRRGGBBAA` pixels.
+
+        The source and destination buffers may alias for in-place conversion.
+    */
+    static void YUP_CALLTYPE convertARGBtoRGBA (const uint32* src, uint32* dst, int numPixels) noexcept;
+
+    /** Expands 8-bit grayscale pixels to RGBA byte pixels with opaque alpha. */
+    static void YUP_CALLTYPE convertGrayscaleToRGBA (const uint8* src, uint8* dst, int numPixels) noexcept;
+
+    /** Expands RGB byte pixels to RGBA byte pixels with opaque alpha. */
+    static void YUP_CALLTYPE convertRGBToRGBA (const uint8* src, uint8* dst, int numPixels) noexcept;
+
+    /** Blends rows of float RGBA pixels using `dst = rowA + (rowB - rowA) * t`. */
+    static void YUP_CALLTYPE lerpRows (const float* rowA, const float* rowB, float* dst, float t, int numPixels) noexcept;
+};
+
+} // namespace yup
diff --git a/modules/yup_simd/buffers/yup_ComplexVectorOperations.cpp b/modules/yup_simd/buffers/yup_ComplexVectorOperations.cpp
new file mode 100644
index 000000000..b80fcc185
--- /dev/null
+++ b/modules/yup_simd/buffers/yup_ComplexVectorOperations.cpp
@@ -0,0 +1,172 @@
+/*
+  ==============================================================================
+
+   This file is part of the YUP library.
+   Copyright (c) 2026 - kunitoki@gmail.com
+
+   YUP is an open source library subject to open-source licensing.
+
+   The code included in this file is provided under the terms of the ISC license
+   http://www.isc.org/downloads/software-support-policy/isc-license. Permission
+   to use, copy, modify, and/or distribute this software for any purpose with or
+   without fee is hereby granted provided that the above copyright notice and
+   this permission notice appear in all copies.
+
+   YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace yup
+{
+
+namespace
+{
+void complexMultiply (const float* __restrict a, const float* __restrict b, float* __restrict y, int complexPairs, bool accumulate) noexcept
+{
+    int i = 0;
+
+#if YUP_USE_AVX_INTRINSICS && YUP_USE_FMA_INTRINSICS
+    constexpr int simdWidth = 4;
+    for (; i <= complexPairs - simdWidth; i += simdWidth)
+    {
+        const int idx = i * 2;
+
+        __m256 av = _mm256_loadu_ps (a + idx);
+        __m256 bv = _mm256_loadu_ps (b + idx);
+
+        const __m256 aShuffled = _mm256_permute_ps (av, _MM_SHUFFLE (2, 3, 0, 1));
+        const __m256 bShuffled = _mm256_permute_ps (bv, _MM_SHUFFLE (2, 3, 0, 1));
+
+        __m256 realPart = _mm256_fmsub_ps (av, bv, _mm256_mul_ps (aShuffled, bShuffled));
+        __m256 imagPart = _mm256_fmadd_ps (av, bShuffled, _mm256_mul_ps (aShuffled, bv));
+
+        __m256 interleaved = _mm256_blend_ps (realPart, imagPart, 0b10101010);
+
+        if (accumulate)
+            interleaved = _mm256_add_ps (_mm256_loadu_ps (y + idx), interleaved);
+
+        _mm256_storeu_ps (y + idx, interleaved);
+    }
+
+#elif YUP_USE_SSE_INTRINSICS
+    constexpr int simdWidth = 2;
+    for (; i <= complexPairs - simdWidth; i += simdWidth)
+    {
+        const int idx = i * 2;
+
+        __m128 av = _mm_loadu_ps (a + idx);
+        __m128 bv = _mm_loadu_ps (b + idx);
+
+        const __m128 aShuffled = _mm_shuffle_ps (av, av, _MM_SHUFFLE (2, 3, 0, 1));
+        const __m128 bShuffled = _mm_shuffle_ps (bv, bv, _MM_SHUFFLE (2, 3, 0, 1));
+
+        __m128 realPart = _mm_sub_ps (_mm_mul_ps (av, bv), _mm_mul_ps (aShuffled, bShuffled));
+        __m128 imagPart = _mm_add_ps (_mm_mul_ps (av, bShuffled), _mm_mul_ps (aShuffled, bv));
+
+        __m128 interleaved = _mm_unpacklo_ps (realPart, imagPart);
+
+        if (accumulate)
+            interleaved = _mm_add_ps (_mm_loadu_ps (y + idx), interleaved);
+
+        _mm_storeu_ps (y + idx, interleaved);
+    }
+
+#elif YUP_USE_ARM_NEON
+    constexpr int simdWidth = 4;
+    for (; i <= complexPairs - simdWidth; i += simdWidth)
+    {
+        const int idx = i * 2;
+
+        float32x4x2_t av = vld2q_f32 (a + idx);
+        float32x4x2_t bv = vld2q_f32 (b + idx);
+
+        float32x4_t real = vmulq_f32 (av.val[0], bv.val[0]);
+        real = vfmsq_f32 (real, av.val[1], bv.val[1]);
+        float32x4_t imag = vmulq_f32 (av.val[0], bv.val[1]);
+        imag = vfmaq_f32 (imag, av.val[1], bv.val[0]);
+
+        if (accumulate)
+        {
+            float32x4x2_t yv = vld2q_f32 (y + idx);
+            real = vaddq_f32 (yv.val[0], real);
+            imag = vaddq_f32 (yv.val[1], imag);
+        }
+
+        float32x4x2_t out = { real, imag };
+        vst2q_f32 (y + idx, out);
+    }
+
+#endif
+
+    for (; i < complexPairs; ++i)
+    {
+        const int realIndex = i * 2;
+        const int imagIndex = realIndex + 1;
+
+        const float ar = a[realIndex];
+        const float ai = a[imagIndex];
+        const float br = b[realIndex];
+        const float bi = b[imagIndex];
+
+        const float real = ar * br - ai * bi;
+        const float imag = ar * bi + ai * br;
+
+        if (accumulate)
+        {
+            y[realIndex] += real;
+            y[imagIndex] += imag;
+        }
+        else
+        {
+            y[realIndex] = real;
+            y[imagIndex] = imag;
+        }
+    }
+}
+} // namespace
+
+//==============================================================================
+void YUP_CALLTYPE ComplexVectorOperations::multiplyAccumulate (const float* a, const float* b, float* y, int complexPairs) noexcept
+{
+    complexMultiply (a, b, y, complexPairs, true);
+}
+
+void YUP_CALLTYPE ComplexVectorOperations::multiply (float* dest, const float* a, const float* b, int complexPairs) noexcept
+{
+    complexMultiply (a, b, dest, complexPairs, false);
+}
+
+void YUP_CALLTYPE ComplexVectorOperations::powerSpectrum (float* dest, const float* src, int complexPairs) noexcept
+{
+    int i = 0;
+
+#if YUP_USE_SSE_INTRINSICS
+    for (; i + 2 <= complexPairs; i += 2)
+    {
+        const __m128 values = _mm_loadu_ps (src + i * 2);
+        const __m128 squared = _mm_mul_ps (values, values);
+        alignas (16) float lanes[4];
+        _mm_store_ps (lanes, squared);
+        dest[i] = lanes[0] + lanes[1];
+        dest[i + 1] = lanes[2] + lanes[3];
+    }
+#elif YUP_USE_ARM_NEON
+    for (; i + 4 <= complexPairs; i += 4)
+    {
+        const float32x4x2_t values = vld2q_f32 (src + i * 2);
+        vst1q_f32 (dest + i, vmlaq_f32 (vmulq_f32 (values.val[0], values.val[0]), values.val[1], values.val[1]));
+    }
+#endif
+
+    for (; i < complexPairs; ++i)
+    {
+        const float real = src[i * 2];
+        const float imag = src[i * 2 + 1];
+        dest[i] = real * real + imag * imag;
+    }
+}
+
+} // namespace yup
diff --git a/modules/yup_simd/buffers/yup_ComplexVectorOperations.h b/modules/yup_simd/buffers/yup_ComplexVectorOperations.h
new file mode 100644
index 000000000..ba780cd71
--- /dev/null
+++ b/modules/yup_simd/buffers/yup_ComplexVectorOperations.h
@@ -0,0 +1,62 @@
+/*
+  ==============================================================================
+
+   This file is part of the YUP library.
+   Copyright (c) 2026 - kunitoki@gmail.com
+
+   YUP is an open source library subject to open-source licensing.
+
+   The code included in this file is provided under the terms of the ISC license
+   http://www.isc.org/downloads/software-support-policy/isc-license. Permission
+   to use, copy, modify, and/or distribute this software for any purpose with or
+   without fee is hereby granted provided that the above copyright notice and
+   this permission notice appear in all copies.
+
+   YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+#pragma once
+
+namespace yup
+{
+
+//==============================================================================
+/** Vector operations for interleaved complex<float> buffers.
+
+    Complex values are stored as `[real, imaginary, real, imaginary, ...]`.
+*/
+class YUP_API ComplexVectorOperations
+{
+public:
+    /** Adds the complex product of `a` and `b` to `y`.
+
+        @param a             Input complex buffer.
+        @param b             Input complex buffer.
+        @param y             Destination complex buffer, accumulated in-place.
+        @param complexPairs  Number of complex pairs, not number of floats.
+    */
+    static void YUP_CALLTYPE multiplyAccumulate (const float* a, const float* b, float* y, int complexPairs) noexcept;
+
+    /** Multiplies two complex buffers and writes the result to `dest`.
+
+        @param dest          Destination complex buffer.
+        @param a             Input complex buffer.
+        @param b             Input complex buffer.
+        @param complexPairs  Number of complex pairs, not number of floats.
+    */
+    static void YUP_CALLTYPE multiply (float* dest, const float* a, const float* b, int complexPairs) noexcept;
+
+    /** Computes the power spectrum of an interleaved complex buffer.
+
+        @param dest          Destination buffer receiving `real * real + imaginary * imaginary`.
+        @param src           Input complex buffer.
+        @param complexPairs  Number of complex pairs, not number of floats.
+    */
+    static void YUP_CALLTYPE powerSpectrum (float* dest, const float* src, int complexPairs) noexcept;
+};
+
+} // namespace yup
diff --git a/modules/yup_simd/buffers/yup_FloatVectorOperations.cpp b/modules/yup_simd/buffers/yup_FloatVectorOperations.cpp
index 72e9718e2..4bf3ed780 100644
--- a/modules/yup_simd/buffers/yup_FloatVectorOperations.cpp
+++ b/modules/yup_simd/buffers/yup_FloatVectorOperations.cpp
@@ -1508,6 +1508,156 @@ double findMaximum (const double* src, Size num) noexcept
 #endif
 }
 
+template <typename Type, typename Size>
+Type sumSquares (const Type* src, Size num) noexcept
+{
+    if (num <= 0)
+        return {};
+
+#if YUP_USE_VDSP_FRAMEWORK
+    Type result = {};
+
+    if constexpr (std::is_same_v<Type, float>)
+        vDSP_svesq (src, 1, &result, (vDSP_Length) num);
+    else
+        vDSP_svesqD (src, 1, &result, (vDSP_Length) num);
+
+    return result;
+#else
+    Type result = {};
+    Size i = 0;
+
+#if YUP_USE_SSE_INTRINSICS || YUP_USE_ARM_NEON
+    using Mode = typename FloatVectorHelpers::ModeType<sizeof (Type)>::Mode;
+
+    typename Mode::ParallelType accumulator = Mode::load1 (Type {});
+
+    for (; i + Mode::numParallel <= num; i += Mode::numParallel)
+    {
+        const auto values = Mode::loadU (src + i);
+        accumulator = Mode::add (accumulator, Mode::mul (values, values));
+    }
+
+    Type lanes[Mode::numParallel];
+    Mode::storeU (lanes, accumulator);
+
+    for (int lane = 0; lane < Mode::numParallel; ++lane)
+        result += lanes[lane];
+#endif
+
+    for (; i < num; ++i)
+        result += src[i] * src[i];
+
+    return result;
+#endif
+}
+
+template <typename Type, typename Size>
+Type dotProduct (const Type* src1, const Type* src2, Size num) noexcept
+{
+    if (num <= 0)
+        return {};
+
+#if YUP_USE_VDSP_FRAMEWORK
+    Type result = {};
+
+    if constexpr (std::is_same_v<Type, float>)
+        vDSP_dotpr (src1, 1, src2, 1, &result, (vDSP_Length) num);
+    else
+        vDSP_dotprD (src1, 1, src2, 1, &result, (vDSP_Length) num);
+
+    return result;
+#else
+    Type result = {};
+    Size i = 0;
+
+#if YUP_USE_SSE_INTRINSICS || YUP_USE_ARM_NEON
+    using Mode = typename FloatVectorHelpers::ModeType<sizeof (Type)>::Mode;
+
+    typename Mode::ParallelType accumulator = Mode::load1 (Type {});
+
+    for (; i + Mode::numParallel <= num; i += Mode::numParallel)
+    {
+        const auto values1 = Mode::loadU (src1 + i);
+        const auto values2 = Mode::loadU (src2 + i);
+        accumulator = Mode::add (accumulator, Mode::mul (values1, values2));
+    }
+
+    Type lanes[Mode::numParallel];
+    Mode::storeU (lanes, accumulator);
+
+    for (int lane = 0; lane < Mode::numParallel; ++lane)
+        result += lanes[lane];
+#endif
+
+    for (; i < num; ++i)
+        result += src1[i] * src2[i];
+
+    return result;
+#endif
+}
+
+template <typename Type, typename Size>
+Type rms (const Type* src, Size num) noexcept
+{
+    return num > 0 ? static_cast<Type> (std::sqrt (sumSquares (src, num) / static_cast<Type> (num))) : Type {};
+}
+
+template <typename Type, typename Size>
+Type maxAbs (const Type* src, Size num) noexcept
+{
+    if (num <= 0)
+        return {};
+
+#if YUP_USE_VDSP_FRAMEWORK
+    Type result = {};
+
+    if constexpr (std::is_same_v<Type, float>)
+        vDSP_maxmgv (src, 1, &result, (vDSP_Length) num);
+    else
+        vDSP_maxmgvD (src, 1, &result, (vDSP_Length) num);
+
+    return result;
+#else
+    Type result = {};
+    Size i = 0;
+
+#if YUP_USE_SSE_INTRINSICS || YUP_USE_ARM_NEON
+    using Mode = typename FloatVectorHelpers::ModeType<sizeof (Type)>::Mode;
+
+    typename Mode::ParallelType accumulator = Mode::load1 (Type {});
+
+    if constexpr (std::is_same_v<Type, float>)
+    {
+        [[maybe_unused]] FloatVectorHelpers::signMask32 signMask;
+        signMask.i = 0x7fffffffUL;
+
+        const auto mask = Mode::load1 (signMask.f);
+
+        for (; i + Mode::numParallel <= num; i += Mode::numParallel)
+            accumulator = Mode::max (accumulator, Mode::bit_and (Mode::loadU (src + i), mask));
+    }
+    else
+    {
+        [[maybe_unused]] FloatVectorHelpers::signMask64 signMask;
+        signMask.i = 0x7fffffffffffffffULL;
+
+        const auto mask = Mode::load1 (signMask.d);
+
+        for (; i + Mode::numParallel <= num; i += Mode::numParallel)
+            accumulator = Mode::max (accumulator, Mode::bit_and (Mode::loadU (src + i), mask));
+    }
+
+    result = Mode::max (accumulator);
+#endif
+
+    for (; i < num; ++i)
+        result = jmax (result, static_cast<Type> (std::abs (src[i])));
+
+    return result;
+#endif
+}
+
 template <typename Size>
 void convertFixedToFloat (float* dest, const int* src, float multiplier, Size num) noexcept
 {
@@ -2044,6 +2194,35 @@ FloatType YUP_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::findMaxi
     return FloatVectorHelpers::findMaximum (src, numValues);
 }
 
+template <typename FloatType, typename CountType>
+FloatType YUP_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::sumSquares (const FloatType* src,
+                                                                                    CountType numValues) noexcept
+{
+    return FloatVectorHelpers::sumSquares (src, numValues);
+}
+
+template <typename FloatType, typename CountType>
+FloatType YUP_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::rms (const FloatType* src,
+                                                                             CountType numValues) noexcept
+{
+    return FloatVectorHelpers::rms (src, numValues);
+}
+
+template <typename FloatType, typename CountType>
+FloatType YUP_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::dotProduct (const FloatType* src1,
+                                                                                    const FloatType* src2,
+                                                                                    CountType numValues) noexcept
+{
+    return FloatVectorHelpers::dotProduct (src1, src2, numValues);
+}
+
+template <typename FloatType, typename CountType>
+FloatType YUP_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::maxAbs (const FloatType* src,
+                                                                                CountType numValues) noexcept
+{
+    return FloatVectorHelpers::maxAbs (src, numValues);
+}
+
 //==============================================================================
 
 template struct FloatVectorOperationsBase<float, int>;
diff --git a/modules/yup_simd/buffers/yup_FloatVectorOperations.h b/modules/yup_simd/buffers/yup_FloatVectorOperations.h
index 37bfe636c..5b9d824ab 100644
--- a/modules/yup_simd/buffers/yup_FloatVectorOperations.h
+++ b/modules/yup_simd/buffers/yup_FloatVectorOperations.h
@@ -173,6 +173,18 @@ struct FloatVectorOperationsBase
 
     /** Finds the maximum value in the given array. */
     static FloatType YUP_CALLTYPE findMaximum (const FloatType* src, CountType numValues) noexcept;
+
+    /** Returns the sum of the squared values in the given array. */
+    static FloatType YUP_CALLTYPE sumSquares (const FloatType* src, CountType numValues) noexcept;
+
+    /** Returns the root mean square of the values in the given array. */
+    static FloatType YUP_CALLTYPE rms (const FloatType* src, CountType numValues) noexcept;
+
+    /** Returns the dot product of two arrays. */
+    static FloatType YUP_CALLTYPE dotProduct (const FloatType* src1, const FloatType* src2, CountType numValues) noexcept;
+
+    /** Returns the maximum absolute value in the given array. */
+    static FloatType YUP_CALLTYPE maxAbs (const FloatType* src, CountType numValues) noexcept;
 };
 
 #if ! DOXYGEN
@@ -201,7 +213,11 @@ struct NameForwarder : public Bases...
         Bases::clip...,
         Bases::findMinAndMax...,
         Bases::findMinimum...,
-        Bases::findMaximum...;
+        Bases::findMaximum...,
+        Bases::sumSquares...,
+        Bases::rms...,
+        Bases::dotProduct...,
+        Bases::maxAbs...;
 };
 
 } // namespace detail
diff --git a/modules/yup_simd/types/yup_EigenAdapters.h b/modules/yup_simd/types/yup_EigenAdapters.h
new file mode 100644
index 000000000..3839ae4b6
--- /dev/null
+++ b/modules/yup_simd/types/yup_EigenAdapters.h
@@ -0,0 +1,43 @@
+/*
+  ==============================================================================
+
+   This file is part of the YUP library.
+   Copyright (c) 2026 - kunitoki@gmail.com
+
+   YUP is an open source library subject to open-source licensing.
+
+   The code included in this file is provided under the terms of the ISC license
+   http://www.isc.org/downloads/software-support-policy/isc-license. Permission
+   to use, copy, modify, and/or distribute this software for any purpose with or
+   without fee is hereby granted provided that the above copyright notice and
+   this permission notice appear in all copies.
+
+   YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+#pragma once
+
+#if YUP_MODULE_AVAILABLE_eigen
+
+namespace yup
+{
+
+inline Eigen::Matrix3f toEigenMatrix (float sx, float shx, float tx, float shy, float sy, float ty) noexcept
+{
+    Eigen::Matrix3f matrix;
+    matrix << sx, shx, tx, shy, sy, ty, 0.0f, 0.0f, 1.0f;
+    return matrix;
+}
+
+inline Eigen::Vector2f toEigenVector (float x, float y) noexcept
+{
+    return Eigen::Vector2f (x, y);
+}
+
+} // namespace yup
+
+#endif
diff --git a/modules/yup_simd/types/yup_SIMDRegister.h b/modules/yup_simd/types/yup_SIMDRegister.h
new file mode 100644
index 000000000..7ff43bd2a
--- /dev/null
+++ b/modules/yup_simd/types/yup_SIMDRegister.h
@@ -0,0 +1,390 @@
+/*
+  ==============================================================================
+
+   This file is part of the YUP library.
+   Copyright (c) 2026 - kunitoki@gmail.com
+
+   YUP is an open source library subject to open-source licensing.
+
+   The code included in this file is provided under the terms of the ISC license
+   http://www.isc.org/downloads/software-support-policy/isc-license. Permission
+   to use, copy, modify, and/or distribute this software for any purpose with or
+   without fee is hereby granted provided that the above copyright notice and
+   this permission notice appear in all copies.
+
+   YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+#pragma once
+
+namespace yup
+{
+
+//==============================================================================
+/** Fixed-width SIMD-style register backed by xsimd batches.
+
+    The lane count is part of the type, which keeps code portable across platforms
+    where the native SIMD width differs.
+
+    @tparam T The element type. Must be supported by xsimd batches.
+    @tparam N The number of elements. Must be greater than 0.
+*/
+template <typename T, int N>
+class SIMDRegister
+{
+    static_assert (N > 0, "SIMDRegister must contain at least one element");
+
+public:
+    //==============================================================================
+    /** The element type of the SIMD register. */
+    using ElementType = T;
+
+    /** The number of elements in the SIMD register. */
+    static constexpr int size = N;
+
+    //==============================================================================
+    /** Default constructor. Initializes all elements to zero. */
+    SIMDRegister() noexcept
+    {
+        for (auto& batch : data)
+            batch = Batch (T {});
+    }
+
+    /** Constructs a SIMD register with all elements set to the given scalar value.
+    
+        @param scalar The value to broadcast to all elements of the register.
+    */
+    explicit SIMDRegister (T scalar) noexcept
+    {
+        for (auto& batch : data)
+            batch = Batch (scalar);
+    }
+
+    /** Constructs a SIMD register by loading elements from a pointer.
+    
+        @param ptr The pointer to the elements to load.
+    */
+    explicit SIMDRegister (const T* ptr) noexcept
+    {
+        load (ptr, false);
+    }
+
+    //==============================================================================
+    /** Loads a SIMD register from a pointer, assuming the pointer is aligned.
+    
+        @param ptr The pointer to the elements to load. Must be aligned to the SIMD width.
+
+        @returns A SIMD register containing the loaded elements.
+    */
+    static forcedinline SIMDRegister loadAligned (const T* ptr) noexcept
+    {
+        SIMDRegister result;
+        result.load (ptr, true);
+        return result;
+    }
+
+    /** Loads a SIMD register from a pointer, assuming the pointer is not aligned.
+    
+        @param ptr The pointer to the elements to load. May be unaligned.
+
+        @returns A SIMD register containing the loaded elements.
+    */
+    static forcedinline SIMDRegister loadUnaligned (const T* ptr) noexcept
+    {
+        SIMDRegister result;
+        result.load (ptr, false);
+        return result;
+    }
+
+    //==============================================================================
+    /** Constructs a SIMD register with all elements set to the given scalar value.
+    
+        @param scalar The value to broadcast to all elements of the register.
+
+        @returns A SIMD register with all elements set to the given scalar value.
+    */
+    static forcedinline SIMDRegister broadcast (T scalar) noexcept
+    {
+        return SIMDRegister (scalar);
+    }
+
+    //==============================================================================
+    /** Constructs a SIMD register with all elements set to zero.
+    
+        @returns A SIMD register with all elements set to zero.
+    */
+    static forcedinline SIMDRegister zero() noexcept
+    {
+        return SIMDRegister();
+    }
+
+    //==============================================================================
+    /** Stores the SIMD register to a pointer, assuming the pointer is aligned.
+    
+        @param ptr The pointer to store the elements. Must be aligned to the SIMD width.
+    */
+    forcedinline void storeAligned (T* ptr) const noexcept
+    {
+        store (ptr, true);
+    }
+
+    /** Stores the SIMD register to a pointer, assuming the pointer is not aligned.
+    
+        @param ptr The pointer to store the elements. May be unaligned.
+    */
+    forcedinline void storeUnaligned (T* ptr) const noexcept
+    {
+        store (ptr, false);
+    }
+
+    //==============================================================================
+    /** Accesses an element of the SIMD register by index.
+    
+        @param i The index of the element to access. Must be in the range [0, N).
+
+        @returns The value of the element at the specified index.
+    */
+    forcedinline T operator[] (int i) const noexcept
+    {
+        jassert (i >= 0 && i < N);
+
+        alignas (64) T lanes[nativeSize];
+        data[static_cast<std::size_t> (i) / nativeSize].store_unaligned (lanes);
+        return lanes[static_cast<std::size_t> (i) % nativeSize];
+    }
+
+    //==============================================================================
+    /** Adds two SIMD registers element-wise.
+    
+        @param other The SIMD register to add.
+
+        @returns A SIMD register containing the element-wise sum.
+    */
+    forcedinline SIMDRegister operator+ (SIMDRegister other) const noexcept
+    {
+        SIMDRegister result;
+        for (std::size_t i = 0; i < numBatches; ++i)
+            result.data[i] = data[i] + other.data[i];
+        return result;
+    }
+
+    /** Subtracts two SIMD registers element-wise.
+    
+        @param other The SIMD register to subtract.
+
+        @returns A SIMD register containing the element-wise difference.
+    */
+    forcedinline SIMDRegister operator- (SIMDRegister other) const noexcept
+    {
+        SIMDRegister result;
+        for (std::size_t i = 0; i < numBatches; ++i)
+            result.data[i] = data[i] - other.data[i];
+        return result;
+    }
+
+    /** Multiplies two SIMD registers element-wise.
+    
+        @param other The SIMD register to multiply.
+
+        @returns A SIMD register containing the element-wise product.
+    */
+    forcedinline SIMDRegister operator* (SIMDRegister other) const noexcept
+    {
+        SIMDRegister result;
+        for (std::size_t i = 0; i < numBatches; ++i)
+            result.data[i] = data[i] * other.data[i];
+        return result;
+    }
+
+    /** Divides two SIMD registers element-wise.
+    
+        @param other The SIMD register to divide by.
+
+        @returns A SIMD register containing the element-wise quotient.
+    */
+    forcedinline SIMDRegister operator/ (SIMDRegister other) const noexcept
+    {
+        SIMDRegister result;
+        for (std::size_t i = 0; i < numBatches; ++i)
+            result.data[i] = data[i] / other.data[i];
+        return result;
+    }
+
+    /** Adds another SIMD register to this one element-wise.
+    
+        @param other The SIMD register to add.
+
+        @returns A reference to this SIMD register after the addition.
+    */
+    forcedinline SIMDRegister& operator+= (SIMDRegister other) noexcept
+    {
+        *this = *this + other;
+        return *this;
+    }
+
+    /** Multiplies another SIMD register with this one element-wise.
+    
+        @param other The SIMD register to multiply.
+
+        @returns A reference to this SIMD register after the multiplication.
+    */
+    forcedinline SIMDRegister& operator*= (SIMDRegister other) noexcept
+    {
+        *this = *this * other;
+        return *this;
+    }
+
+    //==============================================================================
+    /** Performs a fused multiply-add operation: this + (a * b).
+    
+        @param a The first SIMD register to multiply.
+        @param b The second SIMD register to multiply.
+
+        @returns A SIMD register containing the result of the fused multiply-add.
+    */
+    forcedinline SIMDRegister mulAdd (SIMDRegister a, SIMDRegister b) const noexcept
+    {
+        return *this + a * b;
+    }
+
+    //==============================================================================
+    /** Computes the absolute value of each element in the SIMD register.
+    
+        @returns A SIMD register containing the absolute values of the elements.
+    */
+    forcedinline SIMDRegister abs() const noexcept
+    {
+        SIMDRegister result;
+        for (std::size_t i = 0; i < numBatches; ++i)
+            result.data[i] = xsimd::abs (data[i]);
+        return result;
+    }
+
+    //==============================================================================
+    /** Computes the element-wise minimum of two SIMD registers.
+    
+        @param other The SIMD register to compare with.
+
+        @returns A SIMD register containing the element-wise minimum.
+    */
+    forcedinline SIMDRegister min (SIMDRegister other) const noexcept
+    {
+        SIMDRegister result;
+        for (std::size_t i = 0; i < numBatches; ++i)
+            result.data[i] = xsimd::min (data[i], other.data[i]);
+        return result;
+    }
+
+    /** Computes the element-wise maximum of two SIMD registers.
+    
+        @param other The SIMD register to compare with.
+
+        @returns A SIMD register containing the element-wise maximum.
+    */
+    forcedinline SIMDRegister max (SIMDRegister other) const noexcept
+    {
+        SIMDRegister result;
+        for (std::size_t i = 0; i < numBatches; ++i)
+            result.data[i] = xsimd::max (data[i], other.data[i]);
+        return result;
+    }
+
+    //==============================================================================
+    /** Computes the sum of all elements in the SIMD register.
+    
+        @returns The sum of the elements.
+    */
+    forcedinline T sum() const noexcept
+    {
+        T result {};
+        for (int i = 0; i < N; ++i)
+            result += (*this)[i];
+        return result;
+    }
+
+    //==============================================================================
+    /** Computes the maximum value among all elements in the SIMD register.
+    
+        @returns The maximum value among the elements.
+    */
+    forcedinline T hmax() const noexcept
+    {
+        T result = (*this)[0];
+        for (int i = 1; i < N; ++i)
+            result = jmax (result, (*this)[i]);
+        return result;
+    }
+
+private:
+    using Batch = xsimd::batch<T>;
+
+    static constexpr std::size_t nativeSize = Batch::size;
+    static constexpr std::size_t numBatches = (static_cast<std::size_t> (N) + nativeSize - 1) / nativeSize;
+
+    forcedinline void load (const T* ptr, bool aligned) noexcept
+    {
+        std::size_t offset = 0;
+
+        for (std::size_t batchIndex = 0; batchIndex < numBatches; ++batchIndex)
+        {
+            if (offset + nativeSize <= static_cast<std::size_t> (N))
+            {
+                data[batchIndex] = aligned ? Batch::load_aligned (ptr + offset)
+                                           : Batch::load_unaligned (ptr + offset);
+            }
+            else
+            {
+                alignas (64) T lanes[nativeSize] = {};
+                const auto remaining = static_cast<std::size_t> (N) - offset;
+
+                for (std::size_t i = 0; i < remaining; ++i)
+                    lanes[i] = ptr[offset + i];
+
+                data[batchIndex] = Batch::load_unaligned (lanes);
+            }
+
+            offset += nativeSize;
+        }
+    }
+
+    forcedinline void store (T* ptr, bool aligned) const noexcept
+    {
+        std::size_t offset = 0;
+
+        for (std::size_t batchIndex = 0; batchIndex < numBatches; ++batchIndex)
+        {
+            if (offset + nativeSize <= static_cast<std::size_t> (N))
+            {
+                if (aligned)
+                    data[batchIndex].store_aligned (ptr + offset);
+                else
+                    data[batchIndex].store_unaligned (ptr + offset);
+            }
+            else
+            {
+                alignas (64) T lanes[nativeSize];
+                data[batchIndex].store_unaligned (lanes);
+
+                const auto remaining = static_cast<std::size_t> (N) - offset;
+
+                for (std::size_t i = 0; i < remaining; ++i)
+                    ptr[offset + i] = lanes[i];
+            }
+
+            offset += nativeSize;
+        }
+    }
+
+    std::array<Batch, numBatches> data;
+};
+
+using Float4 = SIMDRegister<float, 4>;
+using Float8 = SIMDRegister<float, 8>;
+using Double2 = SIMDRegister<double, 2>;
+using Double4 = SIMDRegister<double, 4>;
+
+} // namespace yup
diff --git a/modules/yup_simd/types/yup_Vec.h b/modules/yup_simd/types/yup_Vec.h
new file mode 100644
index 000000000..6e6157fc8
--- /dev/null
+++ b/modules/yup_simd/types/yup_Vec.h
@@ -0,0 +1,137 @@
+/*
+  ==============================================================================
+
+   This file is part of the YUP library.
+   Copyright (c) 2026 - kunitoki@gmail.com
+
+   YUP is an open source library subject to open-source licensing.
+
+   The code included in this file is provided under the terms of the ISC license
+   http://www.isc.org/downloads/software-support-policy/isc-license. Permission
+   to use, copy, modify, and/or distribute this software for any purpose with or
+   without fee is hereby granted provided that the above copyright notice and
+   this permission notice appear in all copies.
+
+   YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+#pragma once
+
+namespace yup
+{
+
+//==============================================================================
+struct alignas (8) Vec2f
+{
+    float x = 0.0f;
+    float y = 0.0f;
+
+    constexpr Vec2f() noexcept = default;
+
+    constexpr Vec2f (float xIn, float yIn) noexcept
+        : x (xIn)
+        , y (yIn)
+    {
+    }
+
+    constexpr Vec2f operator+ (Vec2f other) const noexcept
+    {
+        return { x + other.x, y + other.y };
+    }
+
+    constexpr Vec2f operator* (float scalar) const noexcept
+    {
+        return { x * scalar, y * scalar };
+    }
+
+    constexpr float dot (Vec2f other) const noexcept
+    {
+        return x * other.x + y * other.y;
+    }
+
+    float length() const noexcept
+    {
+        return std::sqrt (dot (*this));
+    }
+
+    Vec2f normalized() const noexcept
+    {
+        const auto len = length();
+        return len > 0.0f ? *this * (1.0f / len) : Vec2f();
+    }
+};
+
+//==============================================================================
+struct alignas (16) Vec4f
+{
+    float r = 0.0f;
+    float g = 0.0f;
+    float b = 0.0f;
+    float a = 0.0f;
+
+    constexpr Vec4f() noexcept = default;
+
+    constexpr Vec4f (float rIn, float gIn, float bIn, float aIn) noexcept
+        : r (rIn)
+        , g (gIn)
+        , b (bIn)
+        , a (aIn)
+    {
+    }
+
+    Vec4f operator+ (Vec4f other) const noexcept
+    {
+        alignas (16) float result[4];
+        (Float4::loadUnaligned (data()) + Float4::loadUnaligned (other.data())).storeUnaligned (result);
+        return load (result);
+    }
+
+    Vec4f operator* (float scalar) const noexcept
+    {
+        alignas (16) float result[4];
+        (Float4::loadUnaligned (data()) * Float4::broadcast (scalar)).storeUnaligned (result);
+        return load (result);
+    }
+
+    Vec4f operator* (Vec4f other) const noexcept
+    {
+        alignas (16) float result[4];
+        (Float4::loadUnaligned (data()) * Float4::loadUnaligned (other.data())).storeUnaligned (result);
+        return load (result);
+    }
+
+    Vec4f lerp (Vec4f other, float t) const noexcept
+    {
+        return *this + (other + (*this * -1.0f)) * t;
+    }
+
+    constexpr Vec4f premultiplied() const noexcept
+    {
+        return { r * a, g * a, b * a, a };
+    }
+
+    static Vec4f load (const float* ptr) noexcept
+    {
+        return { ptr[0], ptr[1], ptr[2], ptr[3] };
+    }
+
+    void store (float* ptr) const noexcept
+    {
+        ptr[0] = r;
+        ptr[1] = g;
+        ptr[2] = b;
+        ptr[3] = a;
+    }
+
+private:
+    const float* data() const noexcept
+    {
+        return std::addressof (r);
+    }
+};
+
+} // namespace yup
diff --git a/modules/yup_simd/yup_simd.cpp b/modules/yup_simd/yup_simd.cpp
index ae49b2ef7..991fdc145 100644
--- a/modules/yup_simd/yup_simd.cpp
+++ b/modules/yup_simd/yup_simd.cpp
@@ -53,3 +53,6 @@
 #endif
 
 #include "buffers/yup_FloatVectorOperations.cpp"
+#include "buffers/yup_ComplexVectorOperations.cpp"
+#include "buffers/yup_AffineTransformOperations.cpp"
+#include "buffers/yup_ColorVectorOperations.cpp"
diff --git a/modules/yup_simd/yup_simd.h b/modules/yup_simd/yup_simd.h
index 9520b154e..0dd8b172d 100644
--- a/modules/yup_simd/yup_simd.h
+++ b/modules/yup_simd/yup_simd.h
@@ -50,7 +50,7 @@
     website:            https://github.com/kunitoki/yup
     license:            ISC
 
-    dependencies:       yup_core
+    dependencies:       yup_core xsimd
     appleFrameworks:    Accelerate
 
   END_YUP_MODULE_DECLARATION
@@ -62,6 +62,7 @@
 #define YUP_SIMD_H_INCLUDED
 
 #include <yup_core/yup_core.h>
+#include <xsimd/xsimd.hpp>
 
 //==============================================================================
 #ifndef YUP_USE_SSE_INTRINSICS
@@ -126,11 +127,30 @@
 #endif
 #endif
 
+//==============================================================================
+#if YUP_MODULE_AVAILABLE_eigen
+#include <eigen/eigen.h>
+#endif
+
 //==============================================================================
 #include <chrono>
+#include <array>
+#include <cmath>
+#include <cstdint>
 #include <type_traits>
 
 //==============================================================================
 YUP_BEGIN_IGNORE_WARNINGS_MSVC (4661)
+
+#include "types/yup_SIMDRegister.h"
+#include "types/yup_Vec.h"
 #include "buffers/yup_FloatVectorOperations.h"
+#include "buffers/yup_ComplexVectorOperations.h"
+#include "buffers/yup_AffineTransformOperations.h"
+#include "buffers/yup_ColorVectorOperations.h"
+
+#if YUP_MODULE_AVAILABLE_eigen
+#include "types/yup_EigenAdapters.h"
+#endif
+
 YUP_END_IGNORE_WARNINGS_MSVC
diff --git a/tests/yup_graphics/yup_Image.cpp b/tests/yup_graphics/yup_Image.cpp
index 958abce36..424c72506 100644
--- a/tests/yup_graphics/yup_Image.cpp
+++ b/tests/yup_graphics/yup_Image.cpp
@@ -74,6 +74,60 @@ TEST (ImageTests, GrayscaleBitmapStoresLuminanceAndReturnsOpaqueARGBColor)
     EXPECT_EQ (image.getPixel (0, 0), 0xff363636u);
 }
 
+TEST (ImageTests, GrayscaleBitmapConvertsToOpaqueRGBATextureBytes)
+{
+    Image image (3, 1, PixelFormat::Grayscale);
+
+    auto raw = image.getRawData();
+    ASSERT_EQ (raw.size(), 3u);
+    raw[0] = 0;
+    raw[1] = 127;
+    raw[2] = 255;
+
+    uint8 textureBytes[12] = {};
+    ColorVectorOperations::convertGrayscaleToRGBA (raw.data(), textureBytes, 3);
+
+    const uint8 expected[] = { 0, 0, 0, 255, 127, 127, 127, 255, 255, 255, 255, 255 };
+
+    for (size_t i = 0; i < std::size (expected); ++i)
+        EXPECT_EQ (textureBytes[i], expected[i]);
+}
+
+TEST (ImageTests, RgbBitmapConvertsToOpaqueRGBATextureBytes)
+{
+    Image image (2, 1, PixelFormat::RGB);
+
+    image.setPixel (0, 0, 0x80123456);
+    image.setPixel (1, 0, 0xffabcdef);
+
+    const auto raw = image.getRawData();
+    uint8 textureBytes[8] = {};
+    ColorVectorOperations::convertRGBToRGBA (raw.data(), textureBytes, 2);
+
+    const uint8 expected[] = { 0x12, 0x34, 0x56, 0xff, 0xab, 0xcd, 0xef, 0xff };
+
+    for (size_t i = 0; i < std::size (expected); ++i)
+        EXPECT_EQ (textureBytes[i], expected[i]);
+}
+
+TEST (ImageTests, RgbaBitmapConvertsToPremultipliedRGBATextureBytes)
+{
+    Image image (2, 1, PixelFormat::RGBA);
+
+    image.setPixel (0, 0, 0x80102040);
+    image.setPixel (1, 0, 0xff010203);
+
+    const auto raw = image.getRawData();
+    uint8 textureBytes[8] = {};
+    std::memcpy (textureBytes, raw.data(), raw.size());
+    ColorVectorOperations::premultiplyRGBA (textureBytes, 2);
+
+    const uint8 expected[] = { 8, 16, 32, 128, 1, 2, 3, 255 };
+
+    for (size_t i = 0; i < std::size (expected); ++i)
+        EXPECT_EQ (textureBytes[i], expected[i]);
+}
+
 TEST (ImageTests, ColorCanConvertToExplicitPackedByteOrders)
 {
     const Color color (0x80123456);
diff --git a/tests/yup_simd.cpp b/tests/yup_simd.cpp
index cac23a400..16a7e4301 100644
--- a/tests/yup_simd.cpp
+++ b/tests/yup_simd.cpp
@@ -20,3 +20,7 @@
 */
 
 #include "yup_simd/yup_FloatVectorOperations.cpp"
+#include "yup_simd/yup_ComplexVectorOperations.cpp"
+#include "yup_simd/yup_AffineTransformOperations.cpp"
+#include "yup_simd/yup_ColorVectorOperations.cpp"
+#include "yup_simd/yup_SIMDRegister.cpp"
diff --git a/tests/yup_simd/yup_AffineTransformOperations.cpp b/tests/yup_simd/yup_AffineTransformOperations.cpp
new file mode 100644
index 000000000..8e234a98a
--- /dev/null
+++ b/tests/yup_simd/yup_AffineTransformOperations.cpp
@@ -0,0 +1,50 @@
+/*
+  ==============================================================================
+
+   This file is part of the YUP library.
+   Copyright (c) 2026 - kunitoki@gmail.com
+
+   YUP is an open source library subject to open-source licensing.
+
+   The code included in this file is provided under the terms of the ISC license
+   http://www.isc.org/downloads/software-support-policy/isc-license. Permission
+   to use, copy, modify, and/or distribute this software for any purpose with or
+   without fee is hereby granted provided that the above copyright notice and
+   this permission notice appear in all copies.
+
+   YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+#include <gtest/gtest.h>
+
+#include <yup_simd/yup_simd.h>
+
+using namespace yup;
+
+TEST (AffineTransformOpsTests, TransformPointsMatchesScalarReference)
+{
+    constexpr int numPoints = 6;
+    const float srcXs[numPoints] = { -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f };
+    const float srcYs[numPoints] = { 3.0f, 2.0f, 1.0f, 0.0f, -1.0f, -2.0f };
+    float dstXs[numPoints] = {};
+    float dstYs[numPoints] = {};
+
+    constexpr float sx = 2.0f;
+    constexpr float shx = -0.5f;
+    constexpr float tx = 4.0f;
+    constexpr float shy = 0.25f;
+    constexpr float sy = -3.0f;
+    constexpr float ty = 1.0f;
+
+    AffineTransformOperations::transformPoints (srcXs, srcYs, dstXs, dstYs, numPoints, sx, shx, tx, shy, sy, ty);
+
+    for (int i = 0; i < numPoints; ++i)
+    {
+        EXPECT_NEAR (dstXs[i], sx * srcXs[i] + shx * srcYs[i] + tx, 1.0e-5f);
+        EXPECT_NEAR (dstYs[i], shy * srcXs[i] + sy * srcYs[i] + ty, 1.0e-5f);
+    }
+}
diff --git a/tests/yup_simd/yup_ColorVectorOperations.cpp b/tests/yup_simd/yup_ColorVectorOperations.cpp
new file mode 100644
index 000000000..26e5cc270
--- /dev/null
+++ b/tests/yup_simd/yup_ColorVectorOperations.cpp
@@ -0,0 +1,128 @@
+/*
+  ==============================================================================
+
+   This file is part of the YUP library.
+   Copyright (c) 2026 - kunitoki@gmail.com
+
+   YUP is an open source library subject to open-source licensing.
+
+   The code included in this file is provided under the terms of the ISC license
+   http://www.isc.org/downloads/software-support-policy/isc-license. Permission
+   to use, copy, modify, and/or distribute this software for any purpose with or
+   without fee is hereby granted provided that the above copyright notice and
+   this permission notice appear in all copies.
+
+   YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+#include <gtest/gtest.h>
+
+#include <yup_simd/yup_simd.h>
+
+using namespace yup;
+
+TEST (ColorVectorOpsTests, PremultiplyARGBMatchesScalarReference)
+{
+    uint32_t pixels[] = { 0x80ff8040u, 0xff010203u, 0x00010203u };
+
+    ColorVectorOperations::premultiplyARGB (pixels, 3);
+
+    EXPECT_EQ (pixels[0], 0x80804020u);
+    EXPECT_EQ (pixels[1], 0xff010203u);
+    EXPECT_EQ (pixels[2], 0x00000000u);
+}
+
+TEST (ColorVectorOpsTests, PremultiplyRGBAMatchesScalarReference)
+{
+    uint8 pixels[] = { 255, 128, 64, 128, 1, 2, 3, 255, 1, 2, 3, 0 };
+
+    ColorVectorOperations::premultiplyRGBA (pixels, 3);
+
+    EXPECT_EQ (pixels[0], 128);
+    EXPECT_EQ (pixels[1], 64);
+    EXPECT_EQ (pixels[2], 32);
+    EXPECT_EQ (pixels[3], 128);
+    EXPECT_EQ (pixels[4], 1);
+    EXPECT_EQ (pixels[5], 2);
+    EXPECT_EQ (pixels[6], 3);
+    EXPECT_EQ (pixels[7], 255);
+    EXPECT_EQ (pixels[8], 0);
+    EXPECT_EQ (pixels[9], 0);
+    EXPECT_EQ (pixels[10], 0);
+    EXPECT_EQ (pixels[11], 0);
+}
+
+TEST (ColorVectorOpsTests, ConvertARGBtoRGBA)
+{
+    const uint32_t argb[] = { 0x12345678u, 0xaabbccddu };
+    uint32_t rgba[2] = {};
+
+    ColorVectorOperations::convertARGBtoRGBA (argb, rgba, 2);
+
+    EXPECT_EQ (rgba[0], 0x34567812u);
+    EXPECT_EQ (rgba[1], 0xbbccddaau);
+}
+
+TEST (ColorVectorOpsTests, ConvertARGBtoRGBAInPlace)
+{
+    uint32_t pixels[] = { 0x12345678u, 0xaabbccddu };
+
+    ColorVectorOperations::convertARGBtoRGBA (pixels, pixels, 2);
+
+    EXPECT_EQ (pixels[0], 0x34567812u);
+    EXPECT_EQ (pixels[1], 0xbbccddaau);
+}
+
+TEST (ColorVectorOpsTests, ConvertGrayscaleToRGBA)
+{
+    const uint8 gray[] = { 0, 127, 255 };
+    uint8 rgba[12] = {};
+
+    ColorVectorOperations::convertGrayscaleToRGBA (gray, rgba, 3);
+
+    EXPECT_EQ (rgba[0], 0);
+    EXPECT_EQ (rgba[1], 0);
+    EXPECT_EQ (rgba[2], 0);
+    EXPECT_EQ (rgba[3], 255);
+    EXPECT_EQ (rgba[4], 127);
+    EXPECT_EQ (rgba[5], 127);
+    EXPECT_EQ (rgba[6], 127);
+    EXPECT_EQ (rgba[7], 255);
+    EXPECT_EQ (rgba[8], 255);
+    EXPECT_EQ (rgba[9], 255);
+    EXPECT_EQ (rgba[10], 255);
+    EXPECT_EQ (rgba[11], 255);
+}
+
+TEST (ColorVectorOpsTests, ConvertRGBToRGBA)
+{
+    const uint8 rgb[] = { 1, 2, 3, 4, 5, 6 };
+    uint8 rgba[8] = {};
+
+    ColorVectorOperations::convertRGBToRGBA (rgb, rgba, 2);
+
+    EXPECT_EQ (rgba[0], 1);
+    EXPECT_EQ (rgba[1], 2);
+    EXPECT_EQ (rgba[2], 3);
+    EXPECT_EQ (rgba[3], 255);
+    EXPECT_EQ (rgba[4], 4);
+    EXPECT_EQ (rgba[5], 5);
+    EXPECT_EQ (rgba[6], 6);
+    EXPECT_EQ (rgba[7], 255);
+}
+
+TEST (ColorVectorOpsTests, LerpRowsMatchesScalarReference)
+{
+    const float rowA[] = { 0.0f, 0.25f, 0.5f, 0.75f, 1.0f, 0.5f, 0.0f, 1.0f };
+    const float rowB[] = { 1.0f, 0.75f, 0.5f, 0.25f, 0.0f, 0.25f, 0.5f, 0.75f };
+    float dst[8] = {};
+
+    ColorVectorOperations::lerpRows (rowA, rowB, dst, 0.25f, 2);
+
+    for (int i = 0; i < 8; ++i)
+        EXPECT_NEAR (dst[i], rowA[i] + (rowB[i] - rowA[i]) * 0.25f, 1.0e-5f);
+}
diff --git a/tests/yup_simd/yup_ComplexVectorOperations.cpp b/tests/yup_simd/yup_ComplexVectorOperations.cpp
new file mode 100644
index 000000000..f4c578729
--- /dev/null
+++ b/tests/yup_simd/yup_ComplexVectorOperations.cpp
@@ -0,0 +1,93 @@
+/*
+  ==============================================================================
+
+   This file is part of the YUP library.
+   Copyright (c) 2026 - kunitoki@gmail.com
+
+   YUP is an open source library subject to open-source licensing.
+
+   The code included in this file is provided under the terms of the ISC license
+   http://www.isc.org/downloads/software-support-policy/isc-license. Permission
+   to use, copy, modify, and/or distribute this software for any purpose with or
+   without fee is hereby granted provided that the above copyright notice and
+   this permission notice appear in all copies.
+
+   YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+#include <gtest/gtest.h>
+
+#include <yup_simd/yup_simd.h>
+
+using namespace yup;
+
+class ComplexVectorOperationsTests : public ::testing::Test
+{
+protected:
+    static void multiplyReference (float* dest, const float* a, const float* b, int complexPairs)
+    {
+        for (int i = 0; i < complexPairs; ++i)
+        {
+            const int realIndex = i * 2;
+            const int imagIndex = realIndex + 1;
+
+            dest[realIndex] = a[realIndex] * b[realIndex] - a[imagIndex] * b[imagIndex];
+            dest[imagIndex] = a[realIndex] * b[imagIndex] + a[imagIndex] * b[realIndex];
+        }
+    }
+};
+
+TEST_F (ComplexVectorOperationsTests, MultiplyMatchesScalarReference)
+{
+    constexpr int complexPairs = 7;
+    const float a[complexPairs * 2] = { 1.0f, 2.0f, -3.0f, 4.0f, 5.0f, -6.0f, -7.0f, -8.0f, 0.5f, -0.25f, 2.5f, 3.5f, -4.0f, 1.0f };
+    const float b[complexPairs * 2] = { -2.0f, 0.5f, 1.0f, -1.5f, -0.5f, -2.0f, 3.0f, 2.0f, -4.0f, 0.25f, 1.5f, -2.5f, 0.75f, 2.25f };
+
+    float expected[complexPairs * 2] = {};
+    float actual[complexPairs * 2] = {};
+
+    multiplyReference (expected, a, b, complexPairs);
+    ComplexVectorOperations::multiply (actual, a, b, complexPairs);
+
+    for (int i = 0; i < complexPairs * 2; ++i)
+        EXPECT_NEAR (actual[i], expected[i], 1.0e-5f);
+}
+
+TEST_F (ComplexVectorOperationsTests, MultiplyAccumulateMatchesScalarReference)
+{
+    constexpr int complexPairs = 5;
+    const float a[complexPairs * 2] = { 1.0f, -1.0f, 2.0f, 3.0f, -4.0f, 0.5f, 0.25f, -0.75f, 3.0f, 2.0f };
+    const float b[complexPairs * 2] = { 0.5f, 2.0f, -1.0f, 0.25f, 3.0f, -2.0f, 1.5f, 4.0f, -0.5f, 1.0f };
+    float expected[complexPairs * 2] = { 0.25f, -0.5f, 0.75f, 1.0f, -1.25f, 1.5f, 1.75f, -2.0f, 2.25f, 2.5f };
+    float actual[complexPairs * 2];
+
+    for (int i = 0; i < complexPairs * 2; ++i)
+        actual[i] = expected[i];
+
+    float product[complexPairs * 2] = {};
+    multiplyReference (product, a, b, complexPairs);
+
+    for (int i = 0; i < complexPairs * 2; ++i)
+        expected[i] += product[i];
+
+    ComplexVectorOperations::multiplyAccumulate (a, b, actual, complexPairs);
+
+    for (int i = 0; i < complexPairs * 2; ++i)
+        EXPECT_NEAR (actual[i], expected[i], 1.0e-5f);
+}
+
+TEST_F (ComplexVectorOperationsTests, PowerSpectrumMatchesScalarReference)
+{
+    constexpr int complexPairs = 6;
+    const float data[complexPairs * 2] = { 3.0f, 4.0f, 1.0f, -2.0f, -5.0f, 12.0f, 0.5f, 0.25f, -0.75f, 1.25f, 2.0f, -3.0f };
+    float actual[complexPairs] = {};
+
+    ComplexVectorOperations::powerSpectrum (actual, data, complexPairs);
+
+    for (int i = 0; i < complexPairs; ++i)
+        EXPECT_NEAR (actual[i], data[i * 2] * data[i * 2] + data[i * 2 + 1] * data[i * 2 + 1], 1.0e-5f);
+}
diff --git a/tests/yup_simd/yup_FloatVectorOperations.cpp b/tests/yup_simd/yup_FloatVectorOperations.cpp
index cc0807fe8..748a81a8d 100644
--- a/tests/yup_simd/yup_FloatVectorOperations.cpp
+++ b/tests/yup_simd/yup_FloatVectorOperations.cpp
@@ -378,6 +378,55 @@ TEST_F (FloatVectorOperationsTests, ComputeCorrelationMatchesScalar)
     EXPECT_NEAR (correlation, reference, 1.0e-5);
 }
 
+TEST_F (FloatVectorOperationsTests, ReductionsMatchScalarReferences)
+{
+    const float data1[] = { -1.0f, 2.0f, -3.0f, 4.0f, -5.0f };
+    const float data2[] = { 0.5f, -1.0f, 1.5f, -2.0f, 2.5f };
+
+    float expectedSumSquares = 0.0f;
+    float expectedDotProduct = 0.0f;
+    float expectedMaxAbs = 0.0f;
+
+    for (int i = 0; i < 5; ++i)
+    {
+        expectedSumSquares += data1[i] * data1[i];
+        expectedDotProduct += data1[i] * data2[i];
+        expectedMaxAbs = jmax (expectedMaxAbs, std::abs (data1[i]));
+    }
+
+    EXPECT_NEAR (FloatVectorOperations::sumSquares (data1, 5), expectedSumSquares, 1.0e-5f);
+    EXPECT_NEAR (FloatVectorOperations::rms (data1, 5), std::sqrt (expectedSumSquares / 5.0f), 1.0e-5f);
+    EXPECT_NEAR (FloatVectorOperations::dotProduct (data1, data2, 5), expectedDotProduct, 1.0e-5f);
+    EXPECT_NEAR (FloatVectorOperations::maxAbs (data1, 5), expectedMaxAbs, 1.0e-5f);
+
+    EXPECT_FLOAT_EQ (FloatVectorOperations::sumSquares (data1, 0), 0.0f);
+    EXPECT_FLOAT_EQ (FloatVectorOperations::rms (data1, 0), 0.0f);
+    EXPECT_FLOAT_EQ (FloatVectorOperations::dotProduct (data1, data2, 0), 0.0f);
+    EXPECT_FLOAT_EQ (FloatVectorOperations::maxAbs (data1, 0), 0.0f);
+}
+
+TEST_F (FloatVectorOperationsTests, DoubleReductionsMatchScalarReferences)
+{
+    const double data1[] = { -0.25, 0.5, -0.75, 1.0 };
+    const double data2[] = { 2.0, -3.0, 4.0, -5.0 };
+
+    double expectedSumSquares = 0.0;
+    double expectedDotProduct = 0.0;
+    double expectedMaxAbs = 0.0;
+
+    for (int i = 0; i < 4; ++i)
+    {
+        expectedSumSquares += data1[i] * data1[i];
+        expectedDotProduct += data1[i] * data2[i];
+        expectedMaxAbs = jmax (expectedMaxAbs, std::abs (data1[i]));
+    }
+
+    EXPECT_NEAR (FloatVectorOperations::sumSquares (data1, 4), expectedSumSquares, 1.0e-12);
+    EXPECT_NEAR (FloatVectorOperations::rms (data1, 4), std::sqrt (expectedSumSquares / 4.0), 1.0e-12);
+    EXPECT_NEAR (FloatVectorOperations::dotProduct (data1, data2, 4), expectedDotProduct, 1.0e-12);
+    EXPECT_NEAR (FloatVectorOperations::maxAbs (data1, 4), expectedMaxAbs, 1.0e-12);
+}
+
 TEST_F (FloatVectorOperationsTests, FloatToDoubleAndBack)
 {
     Random& random = Random::getSystemRandom();
diff --git a/tests/yup_simd/yup_SIMDRegister.cpp b/tests/yup_simd/yup_SIMDRegister.cpp
new file mode 100644
index 000000000..3eb2b4d60
--- /dev/null
+++ b/tests/yup_simd/yup_SIMDRegister.cpp
@@ -0,0 +1,59 @@
+/*
+  ==============================================================================
+
+   This file is part of the YUP library.
+   Copyright (c) 2026 - kunitoki@gmail.com
+
+   YUP is an open source library subject to open-source licensing.
+
+   The code included in this file is provided under the terms of the ISC license
+   http://www.isc.org/downloads/software-support-policy/isc-license. Permission
+   to use, copy, modify, and/or distribute this software for any purpose with or
+   without fee is hereby granted provided that the above copyright notice and
+   this permission notice appear in all copies.
+
+   YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+#include <gtest/gtest.h>
+
+#include <yup_simd/yup_simd.h>
+
+using namespace yup;
+
+TEST (SIMDRegisterTests, Float4ArithmeticAndHorizontalOps)
+{
+    const float aValues[4] = { 1.0f, -2.0f, 3.0f, -4.0f };
+    const float bValues[4] = { 5.0f, 6.0f, -7.0f, -8.0f };
+
+    const auto a = Float4::loadUnaligned (aValues);
+    const auto b = Float4::loadUnaligned (bValues);
+    const auto result = a + b * Float4::broadcast (2.0f);
+
+    float stored[4] = {};
+    result.storeUnaligned (stored);
+
+    for (int i = 0; i < 4; ++i)
+        EXPECT_FLOAT_EQ (stored[i], aValues[i] + bValues[i] * 2.0f);
+
+    EXPECT_FLOAT_EQ (a.sum(), -2.0f);
+    EXPECT_FLOAT_EQ (a.abs().hmax(), 4.0f);
+}
+
+TEST (SIMDRegisterTests, MulAddAndLoadStoreRoundTrip)
+{
+    alignas (16) const float base[4] = { 1.0f, 2.0f, 3.0f, 4.0f };
+    alignas (16) const float mul[4] = { -1.0f, 0.5f, 2.0f, -0.25f };
+    alignas (16) const float add[4] = { 10.0f, 20.0f, 30.0f, 40.0f };
+    alignas (16) float stored[4] = {};
+
+    const auto result = Float4::loadAligned (base).mulAdd (Float4::loadAligned (mul), Float4::loadAligned (add));
+    result.storeAligned (stored);
+
+    for (int i = 0; i < 4; ++i)
+        EXPECT_FLOAT_EQ (stored[i], base[i] + mul[i] * add[i]);
+}
diff --git a/thirdparty/xsimd/xsimd.h b/thirdparty/xsimd/xsimd.h
new file mode 100644
index 000000000..cfb0d3c16
--- /dev/null
+++ b/thirdparty/xsimd/xsimd.h
@@ -0,0 +1,45 @@
+/*
+  ==============================================================================
+
+   This file is part of the YUP library.
+   Copyright (c) 2026 - kunitoki@gmail.com
+
+   YUP is an open source library subject to open-source licensing.
+
+   The code included in this file is provided under the terms of the ISC license
+   http://www.isc.org/downloads/software-support-policy/isc-license. Permission
+   to use, copy, modify, and/or distribute this software for any purpose with or
+   without fee is hereby granted provided that the above copyright notice and
+   this permission notice appear in all copies.
+
+   YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+/*
+  ==============================================================================
+
+  BEGIN_YUP_MODULE_DECLARATION
+
+    ID:               xsimd
+    vendor:           xtensor-stack
+    version:          14.2.0
+    name:             xsimd C++ library for SIMD intrinsics
+    description:      xsimd is a C++ library for SIMD intrinsics.
+    website:          https://github.com/xtensor-stack/xsimd
+    upstream:         https://github.com/xtensor-stack/xsimd/archive/refs/tags/14.2.0.tar.gz
+    license:          BSD-3-Clause
+
+    searchpaths:      include
+
+  END_YUP_MODULE_DECLARATION
+
+  ==============================================================================
+*/
+
+#pragma once
+
+#include <xsimd/xsimd.hpp>

From 4e04a79aab8d1a60397d65d8c5f16bfa91d2bb4e Mon Sep 17 00:00:00 2001
From: kunitoki <kunitoki@gmail.com>
Date: Fri, 29 May 2026 10:56:19 +0200
Subject: [PATCH 2/5] More tweaks

---
 .../source/examples/SpectrumAnalyzer.h        |  43 +++-
 .../yup_SpectrumAnalyzerComponent.cpp         | 226 +++++++++++++++---
 .../displays/yup_SpectrumAnalyzerComponent.h  |  42 +++-
 .../yup_SpectrumAnalyzerComponent.cpp         |  51 ++++
 4 files changed, 321 insertions(+), 41 deletions(-)

diff --git a/examples/graphics/source/examples/SpectrumAnalyzer.h b/examples/graphics/source/examples/SpectrumAnalyzer.h
index 0d534883c..56932c585 100644
--- a/examples/graphics/source/examples/SpectrumAnalyzer.h
+++ b/examples/graphics/source/examples/SpectrumAnalyzer.h
@@ -715,6 +715,19 @@ class SpectrumAnalyzerDemo
         };
         addAndMakeVisible (*displayTypeCombo);
 
+        // Level mode selector
+        levelModeCombo = std::make_unique<yup::ComboBox> ("LevelMode");
+        levelModeCombo->addItem ("Peak dBFS", 1);
+        levelModeCombo->addItem ("RMS dBFS", 2);
+        levelModeCombo->addItem ("Power dBFS", 3);
+        levelModeCombo->addItem ("PSD dBFS/Hz", 4);
+        levelModeCombo->setSelectedId (1);
+        levelModeCombo->onSelectedItemChanged = [this]
+        {
+            updateLevelMode();
+        };
+        addAndMakeVisible (*levelModeCombo);
+
         // Release control
         releaseSlider = std::make_unique<yup::Slider> (yup::Slider::LinearHorizontal, "Release");
         releaseSlider->setRange ({ 0.0, 5.0 });
@@ -815,7 +828,7 @@ class SpectrumAnalyzerDemo
         // Create parameter labels with proper font sizing
         auto labelFont = font.withHeight (12.0f);
 
-        for (const auto& labelText : { "Signal Type:", "Frequency:", "Amplitude:", "Sweep Duration:", "FFT Size:", "Window:", "Display:", "View Mode:", "Color Map:", "Release:", "Overlap:", "Smoothing:" })
+        for (const auto& labelText : { "Signal Type:", "Frequency:", "Amplitude:", "Sweep Duration:", "FFT Size:", "Window:", "Display:", "View Mode:", "Color Map:", "Release:", "Overlap:", "Smoothing:", "Level Mode:" })
         {
             auto label = parameterLabels.add (std::make_unique<yup::Label> (labelText));
             label->setText (labelText);
@@ -898,6 +911,7 @@ class SpectrumAnalyzerDemo
         auto row3 = bounds.removeFromTop (rowHeight);
         auto releaseSection = row3.removeFromLeft (colWidth);
         auto overlapSection = row3.removeFromLeft (colWidth);
+        auto levelModeSection = row3.removeFromLeft (colWidth);
 
         parameterLabels[9]->setBounds (releaseSection.removeFromTop (labelHeight));
         releaseSlider->setBounds (releaseSection.removeFromTop (controlHeight));
@@ -905,6 +919,9 @@ class SpectrumAnalyzerDemo
         parameterLabels[10]->setBounds (overlapSection.removeFromTop (labelHeight));
         overlapSlider->setBounds (overlapSection.removeFromTop (controlHeight));
 
+        parameterLabels[12]->setBounds (levelModeSection.removeFromTop (labelHeight));
+        levelModeCombo->setBounds (levelModeSection.removeFromTop (controlHeight));
+
         // Fourth row: Status labels
         auto row4 = bounds.removeFromTop (30);
         auto freqStatus = row4.removeFromLeft (bounds.getWidth() / 3);
@@ -1038,6 +1055,29 @@ class SpectrumAnalyzerDemo
         analyzerComponent.setDisplayType (displayType);
     }
 
+    void updateLevelMode()
+    {
+        auto levelMode = yup::SpectrumAnalyzerComponent::LevelMode::peakDecibels;
+
+        switch (levelModeCombo->getSelectedId())
+        {
+            case 1:
+                levelMode = yup::SpectrumAnalyzerComponent::LevelMode::peakDecibels;
+                break;
+            case 2:
+                levelMode = yup::SpectrumAnalyzerComponent::LevelMode::rmsDecibels;
+                break;
+            case 3:
+                levelMode = yup::SpectrumAnalyzerComponent::LevelMode::powerDecibels;
+                break;
+            case 4:
+                levelMode = yup::SpectrumAnalyzerComponent::LevelMode::powerSpectralDensity;
+                break;
+        }
+
+        analyzerComponent.setLevelMode (levelMode);
+    }
+
     void updateViewMode()
     {
         switch (viewModeCombo->getSelectedId())
@@ -1114,6 +1154,7 @@ class SpectrumAnalyzerDemo
     std::unique_ptr<yup::ComboBox> fftSizeCombo;
     std::unique_ptr<yup::ComboBox> windowTypeCombo;
     std::unique_ptr<yup::ComboBox> displayTypeCombo;
+    std::unique_ptr<yup::ComboBox> levelModeCombo;
     std::unique_ptr<yup::Slider> releaseSlider;
     std::unique_ptr<yup::Slider> overlapSlider;
     std::unique_ptr<yup::Slider> smoothingSlider;
diff --git a/modules/yup_audio_gui/displays/yup_SpectrumAnalyzerComponent.cpp b/modules/yup_audio_gui/displays/yup_SpectrumAnalyzerComponent.cpp
index 8f2dc3d33..abcfbdbaa 100644
--- a/modules/yup_audio_gui/displays/yup_SpectrumAnalyzerComponent.cpp
+++ b/modules/yup_audio_gui/displays/yup_SpectrumAnalyzerComponent.cpp
@@ -101,14 +101,14 @@ void SpectrumAnalyzerComponent::processFFT()
     // Perform FFT
     fftProcessor->performRealFFTForward (fftInputBuffer.data(), fftOutputBuffer.data());
 
-    // Pre-compute magnitudes with window gain compensation
+    // Pre-compute raw FFT magnitudes. Calibration is applied when bins are mapped to display levels.
     const int numBins = fftSize / 2 + 1;
 
     for (int binIndex = 0; binIndex < numBins; ++binIndex)
     {
         const float real = fftOutputBuffer[static_cast<size_t> (binIndex * 2)];
         const float imag = fftOutputBuffer[static_cast<size_t> (binIndex * 2 + 1)];
-        const float magnitude = std::sqrt (real * real + imag * imag) * windowGain;
+        const float magnitude = std::sqrt (real * real + imag * imag);
 
         magnitudeBuffer[static_cast<size_t> (binIndex)] = magnitude;
     }
@@ -117,8 +117,6 @@ void SpectrumAnalyzerComponent::processFFT()
 void SpectrumAnalyzerComponent::updateDisplay (bool hasNewFFTData)
 {
     // Always apply consistent smoothing to prevent pulsating
-    const int numBins = fftSize / 2 + 1;
-
     // Process display bins
     for (int i = 0; i < scopeSize; ++i)
     {
@@ -162,36 +160,8 @@ void SpectrumAnalyzerComponent::updateDisplay (bool hasNewFFTData)
             const float endBin = (freqRangeEnd * float (fftSize)) / float (sampleRate);
             const float binSpan = endBin - startBin;
 
-            float magnitude = 0.0f;
-
-            if (binSpan <= 1.5f)
-            {
-                // Low frequencies: Use interpolation for smooth transitions
-                const float exactBin = (centerFreq * float (fftSize)) / float (sampleRate);
-                const int bin1 = jlimit (0, numBins - 1, static_cast<int> (exactBin));
-                const int bin2 = jlimit (0, numBins - 1, bin1 + 1);
-                const float fraction = exactBin - float (bin1);
-
-                const float mag1 = magnitudeBuffer[static_cast<size_t> (bin1)];
-                const float mag2 = magnitudeBuffer[static_cast<size_t> (bin2)];
-
-                // Linear interpolation for smooth low-frequency response
-                magnitude = mag1 + fraction * (mag2 - mag1);
-            }
-            else
-            {
-                // High frequencies: Aggregate multiple bins using peak-hold
-                const int binStart = jlimit (0, numBins - 1, static_cast<int> (startBin));
-                const int binEnd = jlimit (0, numBins - 1, static_cast<int> (endBin + 0.5f));
-
-                for (int binIndex = binStart; binIndex <= binEnd; ++binIndex)
-                    magnitude = jmax (magnitude, magnitudeBuffer[static_cast<size_t> (binIndex)]);
-            }
-
-            // Convert to decibels with proper calibration
-            const float magnitudeDb = magnitude > 0.0f
-                                        ? 20.0f * std::log10 (magnitude / float (fftSize))
-                                        : minDecibels;
+            const float exactBin = (centerFreq * float (fftSize)) / float (sampleRate);
+            const float magnitudeDb = getDisplayDecibelsForBinRange (startBin, endBin, exactBin);
 
             // Map to display range [0.0, 1.0]
             targetLevel = jmap (jlimit (minDecibels, maxDecibels, magnitudeDb), minDecibels, maxDecibels, 0.0f, 1.0f);
@@ -243,13 +213,175 @@ void SpectrumAnalyzerComponent::generateWindow()
 {
     WindowFunctions<float>::generate (currentWindowType, windowBuffer.data(), windowBuffer.size());
 
-    // Calculate window gain compensation
     float windowSum = 0.0f;
+    float windowPowerSum = 0.0f;
     for (int i = 0; i < fftSize; ++i)
-        windowSum += windowBuffer[static_cast<size_t> (i)];
+    {
+        const float windowValue = windowBuffer[static_cast<size_t> (i)];
+        windowSum += windowValue;
+        windowPowerSum += windowValue * windowValue;
+    }
+
+    windowCoherentGain = windowSum > 0.0f ? windowSum / float (fftSize) : 1.0f;
+
+    equivalentNoiseBandwidthBins = (windowSum > 0.0f && windowPowerSum > 0.0f)
+                                     ? (float (fftSize) * windowPowerSum) / (windowSum * windowSum)
+                                     : 1.0f;
+}
+
+float SpectrumAnalyzerComponent::getBinPeakAmplitude (int binIndex) const noexcept
+{
+    const int lastBin = fftSize / 2;
+
+    if (! isPositiveAndBelow (binIndex, lastBin + 1))
+        return 0.0f;
+
+    const float coherentGain = windowCoherentGain > 0.0f ? windowCoherentGain : 1.0f;
+    const float oneSidedScale = (binIndex > 0 && binIndex < lastBin) ? 2.0f : 1.0f;
+    const float rawMagnitude = magnitudeBuffer[static_cast<size_t> (binIndex)];
+
+    return (oneSidedScale * rawMagnitude) / (coherentGain * float (fftSize));
+}
+
+float SpectrumAnalyzerComponent::getBinRMSAmplitude (int binIndex) const noexcept
+{
+    const int lastBin = fftSize / 2;
+    const float peakAmplitude = getBinPeakAmplitude (binIndex);
+
+    if (binIndex <= 0 || binIndex >= lastBin)
+        return peakAmplitude;
+
+    return peakAmplitude * 0.7071067811865475f;
+}
+
+float SpectrumAnalyzerComponent::getBinPower (int binIndex) const noexcept
+{
+    const float rmsAmplitude = getBinRMSAmplitude (binIndex);
+    return rmsAmplitude * rmsAmplitude;
+}
+
+float SpectrumAnalyzerComponent::getBinPowerSpectralDensity (int binIndex) const noexcept
+{
+    const float enbwHz = getEquivalentNoiseBandwidthHz();
+    return enbwHz > 0.0f ? getBinPower (binIndex) / enbwHz : 0.0f;
+}
+
+float SpectrumAnalyzerComponent::getBinLinearLevel (int binIndex) const noexcept
+{
+    switch (levelMode)
+    {
+        case LevelMode::peakDecibels:
+            return getBinPeakAmplitude (binIndex);
+
+        case LevelMode::rmsDecibels:
+            return getBinRMSAmplitude (binIndex);
+
+        case LevelMode::powerDecibels:
+            return getBinPower (binIndex);
+
+        case LevelMode::powerSpectralDensity:
+            return getBinPowerSpectralDensity (binIndex);
+    }
+
+    return getBinPeakAmplitude (binIndex);
+}
+
+float SpectrumAnalyzerComponent::linearLevelToDecibels (float level) const noexcept
+{
+    if (level <= 0.0f)
+        return minDecibels;
+
+    return (isPowerMode() ? 10.0f : 20.0f) * std::log10 (level);
+}
+
+float SpectrumAnalyzerComponent::getInterpolatedPeakDecibels (float exactBin) const noexcept
+{
+    const int numBins = fftSize / 2 + 1;
+    const int lastBin = numBins - 1;
+    const int nearestBin = jlimit (0, lastBin, roundToInt (exactBin));
+
+    int peakBin = nearestBin;
+    float peakLevel = getBinLinearLevel (nearestBin);
+
+    const int searchStart = jmax (0, nearestBin - 1);
+    const int searchEnd = jmin (lastBin, nearestBin + 1);
+
+    for (int binIndex = searchStart; binIndex <= searchEnd; ++binIndex)
+    {
+        const float binLevel = getBinLinearLevel (binIndex);
+
+        if (binLevel > peakLevel)
+        {
+            peakLevel = binLevel;
+            peakBin = binIndex;
+        }
+    }
+
+    const float peakDecibels = linearLevelToDecibels (peakLevel);
+
+    if (peakBin <= 0 || peakBin >= lastBin)
+        return peakDecibels;
+
+    const float y0 = linearLevelToDecibels (getBinLinearLevel (peakBin - 1));
+    const float y1 = peakDecibels;
+    const float y2 = linearLevelToDecibels (getBinLinearLevel (peakBin + 1));
+    const float denominator = y0 - 2.0f * y1 + y2;
+
+    if (std::abs (denominator) < 1.0e-6f || denominator >= 0.0f)
+        return y1;
 
-    // Gain compensation factor to restore energy after windowing
-    windowGain = windowSum > 0.0f ? float (fftSize) / windowSum : 1.0f;
+    const float offset = jlimit (-1.0f, 1.0f, 0.5f * (y0 - y2) / denominator);
+    return y1 - 0.25f * (y0 - y2) * offset;
+}
+
+float SpectrumAnalyzerComponent::getDisplayDecibelsForBinRange (float startBin, float endBin, float centerBin) const noexcept
+{
+    const int numBins = fftSize / 2 + 1;
+    const int lastBin = numBins - 1;
+    const float binSpan = endBin - startBin;
+
+    if (binSpan <= 1.5f && ! isPowerMode())
+        return getInterpolatedPeakDecibels (centerBin);
+
+    const int binStart = jlimit (0, lastBin, static_cast<int> (std::floor (startBin)));
+    const int binEnd = jlimit (0, lastBin, static_cast<int> (std::ceil (endBin)));
+
+    if (levelMode == LevelMode::powerDecibels)
+    {
+        float bandPower = 0.0f;
+
+        for (int binIndex = binStart; binIndex <= binEnd; ++binIndex)
+            bandPower += getBinPower (binIndex);
+
+        return linearLevelToDecibels (bandPower);
+    }
+
+    if (levelMode == LevelMode::powerSpectralDensity)
+    {
+        float densitySum = 0.0f;
+        int densityCount = 0;
+
+        for (int binIndex = binStart; binIndex <= binEnd; ++binIndex)
+        {
+            densitySum += getBinPowerSpectralDensity (binIndex);
+            ++densityCount;
+        }
+
+        return linearLevelToDecibels (densityCount > 0 ? densitySum / float (densityCount) : 0.0f);
+    }
+
+    float peakLevel = 0.0f;
+
+    for (int binIndex = binStart; binIndex <= binEnd; ++binIndex)
+        peakLevel = jmax (peakLevel, getBinLinearLevel (binIndex));
+
+    return linearLevelToDecibels (peakLevel);
+}
+
+bool SpectrumAnalyzerComponent::isPowerMode() const noexcept
+{
+    return levelMode == LevelMode::powerDecibels
+        || levelMode == LevelMode::powerSpectralDensity;
 }
 
 //==============================================================================
@@ -485,8 +617,10 @@ void SpectrumAnalyzerComponent::setWindowType (WindowType type)
     if (currentWindowType != type)
     {
         currentWindowType = type;
+        generateWindow();
+        needsWindowUpdate = false;
 
-        needsWindowUpdate = true;
+        repaint();
     }
 }
 
@@ -551,6 +685,20 @@ void SpectrumAnalyzerComponent::setDisplayType (DisplayType type)
     }
 }
 
+void SpectrumAnalyzerComponent::setLevelMode (LevelMode mode)
+{
+    if (levelMode != mode)
+    {
+        levelMode = mode;
+        repaint();
+    }
+}
+
+float SpectrumAnalyzerComponent::getEquivalentNoiseBandwidthHz() const noexcept
+{
+    return equivalentNoiseBandwidthBins * (float (sampleRate) / float (fftSize));
+}
+
 //==============================================================================
 float SpectrumAnalyzerComponent::getFrequencyForBin (int binIndex) const noexcept
 {
diff --git a/modules/yup_audio_gui/displays/yup_SpectrumAnalyzerComponent.h b/modules/yup_audio_gui/displays/yup_SpectrumAnalyzerComponent.h
index f2a34e88f..da5e94925 100644
--- a/modules/yup_audio_gui/displays/yup_SpectrumAnalyzerComponent.h
+++ b/modules/yup_audio_gui/displays/yup_SpectrumAnalyzerComponent.h
@@ -64,6 +64,15 @@ class YUP_API SpectrumAnalyzerComponent
         filled ///< Draw spectrum as smooth filled area
     };
 
+    /** Level calculation mode for spectrum magnitudes. */
+    enum class LevelMode
+    {
+        peakDecibels,        ///< Peak amplitude in dBFS, where a full-scale sine peak is 0 dBFS
+        rmsDecibels,         ///< RMS amplitude in dBFS, where a full-scale sine is approximately -3.01 dBFS
+        powerDecibels,       ///< RMS power in dBFS, summed across each displayed frequency band
+        powerSpectralDensity ///< RMS power spectral density in dBFS/Hz, corrected for window ENBW
+    };
+
     //==============================================================================
     /** Display constants */
     enum
@@ -159,6 +168,25 @@ class YUP_API SpectrumAnalyzerComponent
     /** Returns the current display type. */
     DisplayType getDisplayType() const noexcept { return displayType; }
 
+    //==============================================================================
+    /** Sets the level calculation mode.
+
+        @param mode    the level mode to use when converting FFT bins to decibels
+    */
+    void setLevelMode (LevelMode mode);
+
+    /** Returns the current level calculation mode. */
+    LevelMode getLevelMode() const noexcept { return levelMode; }
+
+    /** Returns the coherent gain of the current window. */
+    float getWindowCoherentGain() const noexcept { return windowCoherentGain; }
+
+    /** Returns the equivalent noise bandwidth of the current window in FFT bins. */
+    float getEquivalentNoiseBandwidthBins() const noexcept { return equivalentNoiseBandwidthBins; }
+
+    /** Returns the equivalent noise bandwidth of the current window in Hz. */
+    float getEquivalentNoiseBandwidthHz() const noexcept;
+
     //==============================================================================
     /** Sets the release time for spectrum falloff.
 
@@ -214,6 +242,16 @@ class YUP_API SpectrumAnalyzerComponent
     void drawFrequencyGrid (Graphics& g, const Rectangle<float>& bounds);
     void drawDecibelGrid (Graphics& g, const Rectangle<float>& bounds);
 
+    float getBinPeakAmplitude (int binIndex) const noexcept;
+    float getBinRMSAmplitude (int binIndex) const noexcept;
+    float getBinPower (int binIndex) const noexcept;
+    float getBinPowerSpectralDensity (int binIndex) const noexcept;
+    float getBinLinearLevel (int binIndex) const noexcept;
+    float linearLevelToDecibels (float level) const noexcept;
+    float getInterpolatedPeakDecibels (float exactBin) const noexcept;
+    float getDisplayDecibelsForBinRange (float startBin, float endBin, float centerBin) const noexcept;
+    bool isPowerMode() const noexcept;
+
     float frequencyToX (float frequency, const Rectangle<float>& bounds) const noexcept;
     float decibelToY (float decibel, const Rectangle<float>& bounds) const noexcept;
     float binToY (int binIndex, float height) const noexcept;
@@ -235,6 +273,7 @@ class YUP_API SpectrumAnalyzerComponent
     // Configuration
     WindowType currentWindowType = WindowType::hann;
     DisplayType displayType = DisplayType::filled;
+    LevelMode levelMode = LevelMode::peakDecibels;
     int fftSize = 4096;
     float minFrequency = 20.0f;
     float maxFrequency = 20000.0f;
@@ -246,7 +285,8 @@ class YUP_API SpectrumAnalyzerComponent
     float releaseTimeSeconds = 1.0f;
 
     // Window compensation
-    float windowGain = 1.0f;
+    float windowCoherentGain = 1.0f;
+    float equivalentNoiseBandwidthBins = 1.0f;
     bool needsWindowUpdate = true;
 
     YUP_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (SpectrumAnalyzerComponent)
diff --git a/tests/yup_audio_gui/yup_SpectrumAnalyzerComponent.cpp b/tests/yup_audio_gui/yup_SpectrumAnalyzerComponent.cpp
index f057952ee..7bf80d369 100644
--- a/tests/yup_audio_gui/yup_SpectrumAnalyzerComponent.cpp
+++ b/tests/yup_audio_gui/yup_SpectrumAnalyzerComponent.cpp
@@ -163,6 +163,57 @@ TEST_F (SpectrumAnalyzerComponentTests, ToggleWindowTypes)
     EXPECT_EQ (WindowType::hann, analyzer->getWindowType());
 }
 
+TEST_F (SpectrumAnalyzerComponentTests, RectangularWindowHasUnityCalibration)
+{
+    analyzer->setWindowType (WindowType::rectangular);
+
+    EXPECT_FLOAT_EQ (1.0f, analyzer->getWindowCoherentGain());
+    EXPECT_FLOAT_EQ (1.0f, analyzer->getEquivalentNoiseBandwidthBins());
+}
+
+TEST_F (SpectrumAnalyzerComponentTests, HannWindowReportsExpectedNoiseBandwidth)
+{
+    analyzer->setWindowType (WindowType::hann);
+
+    EXPECT_NEAR (0.5f, analyzer->getWindowCoherentGain(), 0.001f);
+    EXPECT_NEAR (1.5f, analyzer->getEquivalentNoiseBandwidthBins(), 0.01f);
+}
+
+TEST_F (SpectrumAnalyzerComponentTests, EquivalentNoiseBandwidthHzFollowsSampleRate)
+{
+    analyzer->setWindowType (WindowType::rectangular);
+    analyzer->setSampleRate (48000.0);
+
+    EXPECT_NEAR (48000.0f / 2048.0f, analyzer->getEquivalentNoiseBandwidthHz(), 0.001f);
+}
+
+//==============================================================================
+// Level Mode Tests
+//==============================================================================
+
+TEST_F (SpectrumAnalyzerComponentTests, DefaultLevelModeIsPeakDecibels)
+{
+    EXPECT_EQ (SpectrumAnalyzerComponent::LevelMode::peakDecibels, analyzer->getLevelMode());
+}
+
+TEST_F (SpectrumAnalyzerComponentTests, SetLevelModeToRMSDecibels)
+{
+    analyzer->setLevelMode (SpectrumAnalyzerComponent::LevelMode::rmsDecibels);
+    EXPECT_EQ (SpectrumAnalyzerComponent::LevelMode::rmsDecibels, analyzer->getLevelMode());
+}
+
+TEST_F (SpectrumAnalyzerComponentTests, SetLevelModeToPowerDecibels)
+{
+    analyzer->setLevelMode (SpectrumAnalyzerComponent::LevelMode::powerDecibels);
+    EXPECT_EQ (SpectrumAnalyzerComponent::LevelMode::powerDecibels, analyzer->getLevelMode());
+}
+
+TEST_F (SpectrumAnalyzerComponentTests, SetLevelModeToPowerSpectralDensity)
+{
+    analyzer->setLevelMode (SpectrumAnalyzerComponent::LevelMode::powerSpectralDensity);
+    EXPECT_EQ (SpectrumAnalyzerComponent::LevelMode::powerSpectralDensity, analyzer->getLevelMode());
+}
+
 //==============================================================================
 // Update Rate Tests
 //==============================================================================

From 271eb3d27ab4d4100ee0974b2a854191c3682ad6 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 29 May 2026 22:09:23 +0000
Subject: [PATCH 3/5] Add comprehensive yup_simd unit tests for all classes

---
 tests/CMakeLists.txt                          |   1 +
 tests/yup_simd.cpp                            |   1 +
 .../yup_AffineTransformOperations.cpp         | 178 +++++++-
 tests/yup_simd/yup_ColorVectorOperations.cpp  | 229 ++++++++++
 .../yup_simd/yup_ComplexVectorOperations.cpp  | 198 +++++++++
 tests/yup_simd/yup_SIMDRegister.cpp           | 391 ++++++++++++++++++
 tests/yup_simd/yup_Vec.cpp                    | 358 ++++++++++++++++
 7 files changed, 1355 insertions(+), 1 deletion(-)
 create mode 100644 tests/yup_simd/yup_Vec.cpp

diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 2faad6c3b..160886428 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -55,6 +55,7 @@ set (target_gtest_modules "")
 set (target_preloaded "")
 set (target_modules
     yup_core
+    yup_simd
     yup_audio_basics
     yup_audio_devices
     yup_audio_formats
diff --git a/tests/yup_simd.cpp b/tests/yup_simd.cpp
index 16a7e4301..aa74f453e 100644
--- a/tests/yup_simd.cpp
+++ b/tests/yup_simd.cpp
@@ -24,3 +24,4 @@
 #include "yup_simd/yup_AffineTransformOperations.cpp"
 #include "yup_simd/yup_ColorVectorOperations.cpp"
 #include "yup_simd/yup_SIMDRegister.cpp"
+#include "yup_simd/yup_Vec.cpp"
diff --git a/tests/yup_simd/yup_AffineTransformOperations.cpp b/tests/yup_simd/yup_AffineTransformOperations.cpp
index 8e234a98a..1a22a5f3f 100644
--- a/tests/yup_simd/yup_AffineTransformOperations.cpp
+++ b/tests/yup_simd/yup_AffineTransformOperations.cpp
@@ -25,7 +25,24 @@
 
 using namespace yup;
 
-TEST (AffineTransformOpsTests, TransformPointsMatchesScalarReference)
+class AffineTransformOpsTests : public ::testing::Test
+{
+protected:
+    static void transformScalar (const float* srcXs, const float* srcYs,
+                                  float* dstXs, float* dstYs, int numPoints,
+                                  float sx, float shx, float tx, float shy, float sy, float ty) noexcept
+    {
+        for (int i = 0; i < numPoints; ++i)
+        {
+            const float x = srcXs[i];
+            const float y = srcYs[i];
+            dstXs[i] = sx * x + shx * y + tx;
+            dstYs[i] = shy * x + sy * y + ty;
+        }
+    }
+};
+
+TEST_F (AffineTransformOpsTests, TransformPointsMatchesScalarReference)
 {
     constexpr int numPoints = 6;
     const float srcXs[numPoints] = { -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f };
@@ -48,3 +65,162 @@ TEST (AffineTransformOpsTests, TransformPointsMatchesScalarReference)
         EXPECT_NEAR (dstYs[i], shy * srcXs[i] + sy * srcYs[i] + ty, 1.0e-5f);
     }
 }
+
+TEST_F (AffineTransformOpsTests, TransformPointsInPlace)
+{
+    constexpr int numPoints = 5;
+    float xs[numPoints] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f };
+    float ys[numPoints] = { 5.0f, 4.0f, 3.0f, 2.0f, 1.0f };
+
+    const float origXs[numPoints] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f };
+    const float origYs[numPoints] = { 5.0f, 4.0f, 3.0f, 2.0f, 1.0f };
+
+    constexpr float sx = 2.0f, shx = 0.5f, tx = 1.0f;
+    constexpr float shy = -0.5f, sy = 3.0f, ty = -1.0f;
+
+    AffineTransformOperations::transformPoints (xs, ys, numPoints, sx, shx, tx, shy, sy, ty);
+
+    for (int i = 0; i < numPoints; ++i)
+    {
+        EXPECT_NEAR (xs[i], sx * origXs[i] + shx * origYs[i] + tx, 1.0e-5f);
+        EXPECT_NEAR (ys[i], shy * origXs[i] + sy * origYs[i] + ty, 1.0e-5f);
+    }
+}
+
+TEST_F (AffineTransformOpsTests, IdentityTransformLeavesPointsUnchanged)
+{
+    constexpr int numPoints = 8;
+    const float srcXs[numPoints] = { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f };
+    const float srcYs[numPoints] = { 8.0f, -7.0f, 6.0f, -5.0f, 4.0f, -3.0f, 2.0f, -1.0f };
+    float dstXs[numPoints] = {};
+    float dstYs[numPoints] = {};
+
+    // Identity: sx=1, shx=0, tx=0, shy=0, sy=1, ty=0
+    AffineTransformOperations::transformPoints (srcXs, srcYs, dstXs, dstYs, numPoints, 1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f);
+
+    for (int i = 0; i < numPoints; ++i)
+    {
+        EXPECT_FLOAT_EQ (dstXs[i], srcXs[i]);
+        EXPECT_FLOAT_EQ (dstYs[i], srcYs[i]);
+    }
+}
+
+TEST_F (AffineTransformOpsTests, PureTranslation)
+{
+    constexpr int numPoints = 6;
+    const float srcXs[numPoints] = { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f };
+    const float srcYs[numPoints] = { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f };
+    float dstXs[numPoints] = {};
+    float dstYs[numPoints] = {};
+
+    constexpr float tx = 100.0f, ty = 200.0f;
+
+    // Pure translation: sx=1, shx=0, shy=0, sy=1
+    AffineTransformOperations::transformPoints (srcXs, srcYs, dstXs, dstYs, numPoints, 1.0f, 0.0f, tx, 0.0f, 1.0f, ty);
+
+    for (int i = 0; i < numPoints; ++i)
+    {
+        EXPECT_FLOAT_EQ (dstXs[i], srcXs[i] + tx);
+        EXPECT_FLOAT_EQ (dstYs[i], srcYs[i] + ty);
+    }
+}
+
+TEST_F (AffineTransformOpsTests, PureScale)
+{
+    constexpr int numPoints = 8;
+    const float srcXs[numPoints] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f };
+    const float srcYs[numPoints] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f };
+    float dstXs[numPoints] = {};
+    float dstYs[numPoints] = {};
+
+    constexpr float sx = 2.0f, sy = -0.5f;
+
+    AffineTransformOperations::transformPoints (srcXs, srcYs, dstXs, dstYs, numPoints, sx, 0.0f, 0.0f, 0.0f, sy, 0.0f);
+
+    for (int i = 0; i < numPoints; ++i)
+    {
+        EXPECT_FLOAT_EQ (dstXs[i], sx * srcXs[i]);
+        EXPECT_FLOAT_EQ (dstYs[i], sy * srcYs[i]);
+    }
+}
+
+TEST_F (AffineTransformOpsTests, SinglePointTransform)
+{
+    const float srcX = 3.0f;
+    const float srcY = 4.0f;
+    float dstX = 0.0f;
+    float dstY = 0.0f;
+
+    constexpr float sx = 2.0f, shx = 1.0f, tx = -1.0f;
+    constexpr float shy = 0.5f, sy = -1.0f, ty = 3.0f;
+
+    AffineTransformOperations::transformPoints (&srcX, &srcY, &dstX, &dstY, 1, sx, shx, tx, shy, sy, ty);
+
+    EXPECT_NEAR (dstX, sx * srcX + shx * srcY + tx, 1.0e-5f);
+    EXPECT_NEAR (dstY, shy * srcX + sy * srcY + ty, 1.0e-5f);
+}
+
+TEST_F (AffineTransformOpsTests, ZeroPointsDoesNothing)
+{
+    float dstXs[4] = { 99.0f, 99.0f, 99.0f, 99.0f };
+    float dstYs[4] = { 99.0f, 99.0f, 99.0f, 99.0f };
+
+    // numPoints=0 should not write to output
+    AffineTransformOperations::transformPoints (nullptr, nullptr, dstXs, dstYs, 0, 1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f);
+
+    for (int i = 0; i < 4; ++i)
+    {
+        EXPECT_FLOAT_EQ (dstXs[i], 99.0f);
+        EXPECT_FLOAT_EQ (dstYs[i], 99.0f);
+    }
+}
+
+TEST_F (AffineTransformOpsTests, LargeBufferMatchesScalarReference)
+{
+    constexpr int numPoints = 100;
+    float srcXs[numPoints], srcYs[numPoints];
+    float dstXs[numPoints], dstYs[numPoints];
+    float refXs[numPoints], refYs[numPoints];
+
+    for (int i = 0; i < numPoints; ++i)
+    {
+        srcXs[i] = (float) (i - 50);
+        srcYs[i] = (float) (i * 2 - 100);
+    }
+
+    constexpr float sx = 1.5f, shx = 0.3f, tx = 7.0f;
+    constexpr float shy = -0.2f, sy = 2.0f, ty = -3.0f;
+
+    AffineTransformOperations::transformPoints (srcXs, srcYs, dstXs, dstYs, numPoints, sx, shx, tx, shy, sy, ty);
+    transformScalar (srcXs, srcYs, refXs, refYs, numPoints, sx, shx, tx, shy, sy, ty);
+
+    for (int i = 0; i < numPoints; ++i)
+    {
+        EXPECT_NEAR (dstXs[i], refXs[i], 1.0e-4f);
+        EXPECT_NEAR (dstYs[i], refYs[i], 1.0e-4f);
+    }
+}
+
+TEST_F (AffineTransformOpsTests, ThreePointsUsesScalarTail)
+{
+    // 3 points: the SIMD path handles 4 at a time, so all 3 go through scalar tail
+    constexpr int numPoints = 3;
+    const float srcXs[numPoints] = { 1.0f, 2.0f, 3.0f };
+    const float srcYs[numPoints] = { 4.0f, 5.0f, 6.0f };
+    float dstXs[numPoints] = {};
+    float dstYs[numPoints] = {};
+    float refXs[numPoints] = {};
+    float refYs[numPoints] = {};
+
+    constexpr float sx = 2.0f, shx = 0.5f, tx = 1.0f;
+    constexpr float shy = -0.5f, sy = 3.0f, ty = -1.0f;
+
+    AffineTransformOperations::transformPoints (srcXs, srcYs, dstXs, dstYs, numPoints, sx, shx, tx, shy, sy, ty);
+    transformScalar (srcXs, srcYs, refXs, refYs, numPoints, sx, shx, tx, shy, sy, ty);
+
+    for (int i = 0; i < numPoints; ++i)
+    {
+        EXPECT_NEAR (dstXs[i], refXs[i], 1.0e-5f);
+        EXPECT_NEAR (dstYs[i], refYs[i], 1.0e-5f);
+    }
+}
diff --git a/tests/yup_simd/yup_ColorVectorOperations.cpp b/tests/yup_simd/yup_ColorVectorOperations.cpp
index 26e5cc270..8e0a24280 100644
--- a/tests/yup_simd/yup_ColorVectorOperations.cpp
+++ b/tests/yup_simd/yup_ColorVectorOperations.cpp
@@ -25,6 +25,10 @@
 
 using namespace yup;
 
+// ==============================================================================
+// premultiplyARGB tests
+// ==============================================================================
+
 TEST (ColorVectorOpsTests, PremultiplyARGBMatchesScalarReference)
 {
     uint32_t pixels[] = { 0x80ff8040u, 0xff010203u, 0x00010203u };
@@ -36,6 +40,53 @@ TEST (ColorVectorOpsTests, PremultiplyARGBMatchesScalarReference)
     EXPECT_EQ (pixels[2], 0x00000000u);
 }
 
+TEST (ColorVectorOpsTests, PremultiplyARGBFullAlphaUnchanged)
+{
+    // alpha=0xff: all channels should be unchanged
+    uint32_t pixels[] = { 0xff112233u };
+    ColorVectorOperations::premultiplyARGB (pixels, 1);
+    EXPECT_EQ (pixels[0], 0xff112233u);
+}
+
+TEST (ColorVectorOpsTests, PremultiplyARGBZeroAlphaBlacksOut)
+{
+    // alpha=0x00: all channels should become 0
+    uint32_t pixels[] = { 0x00aabbccu };
+    ColorVectorOperations::premultiplyARGB (pixels, 1);
+    EXPECT_EQ (pixels[0], 0x00000000u);
+}
+
+TEST (ColorVectorOpsTests, PremultiplyARGBHalfAlpha)
+{
+    // alpha=0x80 (128): channels should be approximately halved
+    // premultiply: (c * 128 + 127) / 255
+    uint32_t pixels[] = { 0x80ffu << 16 | 0x80u << 8 | 0x80u };
+    pixels[0] = (0x80u << 24) | (0xffu << 16) | (0x80u << 8) | 0x80u;
+    const uint32_t alpha = 0x80u;
+    const uint32_t expectedRed = (0xffu * alpha + 127u) / 255u;
+    const uint32_t expectedGreen = (0x80u * alpha + 127u) / 255u;
+    const uint32_t expectedBlue = (0x80u * alpha + 127u) / 255u;
+
+    ColorVectorOperations::premultiplyARGB (pixels, 1);
+
+    EXPECT_EQ ((pixels[0] >> 24) & 0xffu, alpha);
+    EXPECT_EQ ((pixels[0] >> 16) & 0xffu, expectedRed);
+    EXPECT_EQ ((pixels[0] >> 8) & 0xffu, expectedGreen);
+    EXPECT_EQ (pixels[0] & 0xffu, expectedBlue);
+}
+
+TEST (ColorVectorOpsTests, PremultiplyARGBZeroPixels)
+{
+    // Should not write to output buffer
+    uint32_t pixels[] = { 0xdeadbeefu };
+    ColorVectorOperations::premultiplyARGB (pixels, 0);
+    EXPECT_EQ (pixels[0], 0xdeadbeefu);
+}
+
+// ==============================================================================
+// premultiplyRGBA tests
+// ==============================================================================
+
 TEST (ColorVectorOpsTests, PremultiplyRGBAMatchesScalarReference)
 {
     uint8 pixels[] = { 255, 128, 64, 128, 1, 2, 3, 255, 1, 2, 3, 0 };
@@ -56,6 +107,37 @@ TEST (ColorVectorOpsTests, PremultiplyRGBAMatchesScalarReference)
     EXPECT_EQ (pixels[11], 0);
 }
 
+TEST (ColorVectorOpsTests, PremultiplyRGBAFullAlphaUnchanged)
+{
+    uint8 pixels[] = { 100, 150, 200, 255 };
+    ColorVectorOperations::premultiplyRGBA (pixels, 1);
+    EXPECT_EQ (pixels[0], 100);
+    EXPECT_EQ (pixels[1], 150);
+    EXPECT_EQ (pixels[2], 200);
+    EXPECT_EQ (pixels[3], 255);
+}
+
+TEST (ColorVectorOpsTests, PremultiplyRGBAZeroAlphaBlacksOut)
+{
+    uint8 pixels[] = { 100, 150, 200, 0 };
+    ColorVectorOperations::premultiplyRGBA (pixels, 1);
+    EXPECT_EQ (pixels[0], 0);
+    EXPECT_EQ (pixels[1], 0);
+    EXPECT_EQ (pixels[2], 0);
+    EXPECT_EQ (pixels[3], 0);
+}
+
+TEST (ColorVectorOpsTests, PremultiplyRGBAZeroPixels)
+{
+    uint8 pixels[] = { 99, 98, 97, 96 };
+    ColorVectorOperations::premultiplyRGBA (pixels, 0);
+    EXPECT_EQ (pixels[0], 99);
+}
+
+// ==============================================================================
+// convertARGBtoRGBA tests
+// ==============================================================================
+
 TEST (ColorVectorOpsTests, ConvertARGBtoRGBA)
 {
     const uint32_t argb[] = { 0x12345678u, 0xaabbccddu };
@@ -77,6 +159,43 @@ TEST (ColorVectorOpsTests, ConvertARGBtoRGBAInPlace)
     EXPECT_EQ (pixels[1], 0xbbccddaau);
 }
 
+TEST (ColorVectorOpsTests, ConvertARGBtoRGBAZeroPixels)
+{
+    const uint32_t argb[] = { 0x12345678u };
+    uint32_t rgba[] = { 0xdeadbeefu };
+    ColorVectorOperations::convertARGBtoRGBA (argb, rgba, 0);
+    EXPECT_EQ (rgba[0], 0xdeadbeefu);
+}
+
+TEST (ColorVectorOpsTests, ConvertARGBtoRGBASinglePixel)
+{
+    // 0xAARRGGBB -> 0xRRGGBBAA
+    const uint32_t argb[] = { 0xaabbccddu };
+    uint32_t rgba[] = { 0u };
+    ColorVectorOperations::convertARGBtoRGBA (argb, rgba, 1);
+    EXPECT_EQ (rgba[0], 0xbbccddaau);
+}
+
+TEST (ColorVectorOpsTests, ConvertARGBtoRGBAAllZero)
+{
+    const uint32_t argb[] = { 0x00000000u };
+    uint32_t rgba[] = { 0xffffffffu };
+    ColorVectorOperations::convertARGBtoRGBA (argb, rgba, 1);
+    EXPECT_EQ (rgba[0], 0x00000000u);
+}
+
+TEST (ColorVectorOpsTests, ConvertARGBtoRGBAAllOnes)
+{
+    const uint32_t argb[] = { 0xffffffffu };
+    uint32_t rgba[] = { 0u };
+    ColorVectorOperations::convertARGBtoRGBA (argb, rgba, 1);
+    EXPECT_EQ (rgba[0], 0xffffffffu);
+}
+
+// ==============================================================================
+// convertGrayscaleToRGBA tests
+// ==============================================================================
+
 TEST (ColorVectorOpsTests, ConvertGrayscaleToRGBA)
 {
     const uint8 gray[] = { 0, 127, 255 };
@@ -98,6 +217,29 @@ TEST (ColorVectorOpsTests, ConvertGrayscaleToRGBA)
     EXPECT_EQ (rgba[11], 255);
 }
 
+TEST (ColorVectorOpsTests, ConvertGrayscaleToRGBAAlwaysOpaqueAlpha)
+{
+    const uint8 gray[] = { 128 };
+    uint8 rgba[4] = {};
+    ColorVectorOperations::convertGrayscaleToRGBA (gray, rgba, 1);
+    EXPECT_EQ (rgba[0], 128);
+    EXPECT_EQ (rgba[1], 128);
+    EXPECT_EQ (rgba[2], 128);
+    EXPECT_EQ (rgba[3], 255);
+}
+
+TEST (ColorVectorOpsTests, ConvertGrayscaleToRGBAZeroPixels)
+{
+    const uint8 gray[] = { 99 };
+    uint8 rgba[4] = { 1, 2, 3, 4 };
+    ColorVectorOperations::convertGrayscaleToRGBA (gray, rgba, 0);
+    EXPECT_EQ (rgba[0], 1);
+}
+
+// ==============================================================================
+// convertRGBToRGBA tests
+// ==============================================================================
+
 TEST (ColorVectorOpsTests, ConvertRGBToRGBA)
 {
     const uint8 rgb[] = { 1, 2, 3, 4, 5, 6 };
@@ -115,6 +257,29 @@ TEST (ColorVectorOpsTests, ConvertRGBToRGBA)
     EXPECT_EQ (rgba[7], 255);
 }
 
+TEST (ColorVectorOpsTests, ConvertRGBToRGBAAlwaysOpaqueAlpha)
+{
+    const uint8 rgb[] = { 10, 20, 30 };
+    uint8 rgba[4] = {};
+    ColorVectorOperations::convertRGBToRGBA (rgb, rgba, 1);
+    EXPECT_EQ (rgba[0], 10);
+    EXPECT_EQ (rgba[1], 20);
+    EXPECT_EQ (rgba[2], 30);
+    EXPECT_EQ (rgba[3], 255);
+}
+
+TEST (ColorVectorOpsTests, ConvertRGBToRGBAZeroPixels)
+{
+    const uint8 rgb[] = { 99, 98, 97 };
+    uint8 rgba[4] = { 1, 2, 3, 4 };
+    ColorVectorOperations::convertRGBToRGBA (rgb, rgba, 0);
+    EXPECT_EQ (rgba[0], 1);
+}
+
+// ==============================================================================
+// lerpRows tests
+// ==============================================================================
+
 TEST (ColorVectorOpsTests, LerpRowsMatchesScalarReference)
 {
     const float rowA[] = { 0.0f, 0.25f, 0.5f, 0.75f, 1.0f, 0.5f, 0.0f, 1.0f };
@@ -126,3 +291,67 @@ TEST (ColorVectorOpsTests, LerpRowsMatchesScalarReference)
     for (int i = 0; i < 8; ++i)
         EXPECT_NEAR (dst[i], rowA[i] + (rowB[i] - rowA[i]) * 0.25f, 1.0e-5f);
 }
+
+TEST (ColorVectorOpsTests, LerpRowsAtZeroGivesRowA)
+{
+    const float rowA[] = { 1.0f, 2.0f, 3.0f, 4.0f };
+    const float rowB[] = { 5.0f, 6.0f, 7.0f, 8.0f };
+    float dst[4] = {};
+
+    ColorVectorOperations::lerpRows (rowA, rowB, dst, 0.0f, 1);
+
+    for (int i = 0; i < 4; ++i)
+        EXPECT_NEAR (dst[i], rowA[i], 1.0e-5f);
+}
+
+TEST (ColorVectorOpsTests, LerpRowsAtOneGivesRowB)
+{
+    const float rowA[] = { 1.0f, 2.0f, 3.0f, 4.0f };
+    const float rowB[] = { 5.0f, 6.0f, 7.0f, 8.0f };
+    float dst[4] = {};
+
+    ColorVectorOperations::lerpRows (rowA, rowB, dst, 1.0f, 1);
+
+    for (int i = 0; i < 4; ++i)
+        EXPECT_NEAR (dst[i], rowB[i], 1.0e-5f);
+}
+
+TEST (ColorVectorOpsTests, LerpRowsAtHalfIsMidpoint)
+{
+    const float rowA[] = { 0.0f, 0.0f, 0.0f, 0.0f };
+    const float rowB[] = { 1.0f, 1.0f, 1.0f, 1.0f };
+    float dst[4] = {};
+
+    ColorVectorOperations::lerpRows (rowA, rowB, dst, 0.5f, 1);
+
+    for (int i = 0; i < 4; ++i)
+        EXPECT_NEAR (dst[i], 0.5f, 1.0e-5f);
+}
+
+TEST (ColorVectorOpsTests, LerpRowsZeroPixels)
+{
+    const float rowA[] = { 1.0f, 1.0f, 1.0f, 1.0f };
+    const float rowB[] = { 2.0f, 2.0f, 2.0f, 2.0f };
+    float dst[4] = { 9.0f, 9.0f, 9.0f, 9.0f };
+
+    ColorVectorOperations::lerpRows (rowA, rowB, dst, 0.5f, 0);
+
+    for (int i = 0; i < 4; ++i)
+        EXPECT_FLOAT_EQ (dst[i], 9.0f);
+}
+
+TEST (ColorVectorOpsTests, LerpRowsMultiplePixels)
+{
+    // 4 pixels = 16 floats
+    float rowA[16], rowB[16], dst[16];
+    for (int i = 0; i < 16; ++i)
+    {
+        rowA[i] = (float) i;
+        rowB[i] = (float) (i + 16);
+    }
+
+    ColorVectorOperations::lerpRows (rowA, rowB, dst, 0.5f, 4);
+
+    for (int i = 0; i < 16; ++i)
+        EXPECT_NEAR (dst[i], rowA[i] + (rowB[i] - rowA[i]) * 0.5f, 1.0e-4f);
+}
diff --git a/tests/yup_simd/yup_ComplexVectorOperations.cpp b/tests/yup_simd/yup_ComplexVectorOperations.cpp
index f4c578729..94527514e 100644
--- a/tests/yup_simd/yup_ComplexVectorOperations.cpp
+++ b/tests/yup_simd/yup_ComplexVectorOperations.cpp
@@ -23,6 +23,8 @@
 
 #include <yup_simd/yup_simd.h>
 
+#include <cmath>
+
 using namespace yup;
 
 class ComplexVectorOperationsTests : public ::testing::Test
@@ -41,6 +43,10 @@ class ComplexVectorOperationsTests : public ::testing::Test
     }
 };
 
+// ==============================================================================
+// multiply tests
+// ==============================================================================
+
 TEST_F (ComplexVectorOperationsTests, MultiplyMatchesScalarReference)
 {
     constexpr int complexPairs = 7;
@@ -57,6 +63,70 @@ TEST_F (ComplexVectorOperationsTests, MultiplyMatchesScalarReference)
         EXPECT_NEAR (actual[i], expected[i], 1.0e-5f);
 }
 
+TEST_F (ComplexVectorOperationsTests, MultiplySinglePair)
+{
+    // (1 + 2i) * (3 + 4i) = (1*3 - 2*4) + (1*4 + 2*3)i = -5 + 10i
+    const float a[] = { 1.0f, 2.0f };
+    const float b[] = { 3.0f, 4.0f };
+    float result[2] = {};
+
+    ComplexVectorOperations::multiply (result, a, b, 1);
+
+    EXPECT_NEAR (result[0], -5.0f, 1.0e-5f);
+    EXPECT_NEAR (result[1], 10.0f, 1.0e-5f);
+}
+
+TEST_F (ComplexVectorOperationsTests, MultiplyByOne)
+{
+    // Multiplying by (1 + 0i) should leave values unchanged
+    const float a[] = { 3.0f, 4.0f, -1.0f, 2.0f };
+    const float one[] = { 1.0f, 0.0f, 1.0f, 0.0f };
+    float result[4] = {};
+
+    ComplexVectorOperations::multiply (result, a, one, 2);
+
+    EXPECT_NEAR (result[0], a[0], 1.0e-5f);
+    EXPECT_NEAR (result[1], a[1], 1.0e-5f);
+    EXPECT_NEAR (result[2], a[2], 1.0e-5f);
+    EXPECT_NEAR (result[3], a[3], 1.0e-5f);
+}
+
+TEST_F (ComplexVectorOperationsTests, MultiplyByConjugateGivesModulusSq)
+{
+    // (a + bi) * (a - bi) = a^2 + b^2 (real, zero imaginary)
+    const float a[] = { 3.0f, 4.0f };
+    const float conj[] = { 3.0f, -4.0f };
+    float result[2] = {};
+
+    ComplexVectorOperations::multiply (result, a, conj, 1);
+
+    EXPECT_NEAR (result[0], 25.0f, 1.0e-5f); // 3^2 + 4^2 = 25
+    EXPECT_NEAR (result[1], 0.0f, 1.0e-5f);
+}
+
+TEST_F (ComplexVectorOperationsTests, MultiplyLargeBufferMatchesReference)
+{
+    constexpr int complexPairs = 20;
+    float a[complexPairs * 2], b[complexPairs * 2];
+    float expected[complexPairs * 2], actual[complexPairs * 2];
+
+    for (int i = 0; i < complexPairs * 2; ++i)
+    {
+        a[i] = (float) (i - complexPairs) * 0.1f;
+        b[i] = (float) (complexPairs - i) * 0.2f;
+    }
+
+    multiplyReference (expected, a, b, complexPairs);
+    ComplexVectorOperations::multiply (actual, a, b, complexPairs);
+
+    for (int i = 0; i < complexPairs * 2; ++i)
+        EXPECT_NEAR (actual[i], expected[i], 1.0e-4f);
+}
+
+// ==============================================================================
+// multiplyAccumulate tests
+// ==============================================================================
+
 TEST_F (ComplexVectorOperationsTests, MultiplyAccumulateMatchesScalarReference)
 {
     constexpr int complexPairs = 5;
@@ -80,6 +150,66 @@ TEST_F (ComplexVectorOperationsTests, MultiplyAccumulateMatchesScalarReference)
         EXPECT_NEAR (actual[i], expected[i], 1.0e-5f);
 }
 
+TEST_F (ComplexVectorOperationsTests, MultiplyAccumulateSinglePair)
+{
+    // Start with (10 + 5i), add (1+2i)*(3+4i) = -5+10i
+    // Result: (10-5) + (5+10)i = 5 + 15i
+    const float a[] = { 1.0f, 2.0f };
+    const float b[] = { 3.0f, 4.0f };
+    float y[] = { 10.0f, 5.0f };
+
+    ComplexVectorOperations::multiplyAccumulate (a, b, y, 1);
+
+    EXPECT_NEAR (y[0], 5.0f, 1.0e-5f);
+    EXPECT_NEAR (y[1], 15.0f, 1.0e-5f);
+}
+
+TEST_F (ComplexVectorOperationsTests, MultiplyAccumulateWithZeroInputLeavesDestUnchanged)
+{
+    // a = (0 + 0i), any b: product is (0 + 0i) -> dest unchanged
+    const float a[] = { 0.0f, 0.0f, 0.0f, 0.0f };
+    const float b[] = { 3.0f, 4.0f, 1.0f, 2.0f };
+    float y[] = { 7.0f, 8.0f, 9.0f, 10.0f };
+
+    ComplexVectorOperations::multiplyAccumulate (a, b, y, 2);
+
+    EXPECT_NEAR (y[0], 7.0f, 1.0e-5f);
+    EXPECT_NEAR (y[1], 8.0f, 1.0e-5f);
+    EXPECT_NEAR (y[2], 9.0f, 1.0e-5f);
+    EXPECT_NEAR (y[3], 10.0f, 1.0e-5f);
+}
+
+TEST_F (ComplexVectorOperationsTests, MultiplyAccumulateLargeBuffer)
+{
+    constexpr int complexPairs = 16;
+    float a[complexPairs * 2], b[complexPairs * 2];
+    float initY[complexPairs * 2];
+    float expected[complexPairs * 2], actual[complexPairs * 2];
+
+    for (int i = 0; i < complexPairs * 2; ++i)
+    {
+        a[i] = (float) i * 0.1f;
+        b[i] = (float) (complexPairs * 2 - i) * 0.1f;
+        initY[i] = (float) i * 0.5f;
+        expected[i] = initY[i];
+        actual[i] = initY[i];
+    }
+
+    float product[complexPairs * 2] = {};
+    multiplyReference (product, a, b, complexPairs);
+    for (int i = 0; i < complexPairs * 2; ++i)
+        expected[i] += product[i];
+
+    ComplexVectorOperations::multiplyAccumulate (a, b, actual, complexPairs);
+
+    for (int i = 0; i < complexPairs * 2; ++i)
+        EXPECT_NEAR (actual[i], expected[i], 1.0e-4f);
+}
+
+// ==============================================================================
+// powerSpectrum tests
+// ==============================================================================
+
 TEST_F (ComplexVectorOperationsTests, PowerSpectrumMatchesScalarReference)
 {
     constexpr int complexPairs = 6;
@@ -91,3 +221,71 @@ TEST_F (ComplexVectorOperationsTests, PowerSpectrumMatchesScalarReference)
     for (int i = 0; i < complexPairs; ++i)
         EXPECT_NEAR (actual[i], data[i * 2] * data[i * 2] + data[i * 2 + 1] * data[i * 2 + 1], 1.0e-5f);
 }
+
+TEST_F (ComplexVectorOperationsTests, PowerSpectrumSinglePair)
+{
+    // (3, 4): magnitude^2 = 9 + 16 = 25
+    const float data[] = { 3.0f, 4.0f };
+    float result = 0.0f;
+
+    ComplexVectorOperations::powerSpectrum (&result, data, 1);
+
+    EXPECT_NEAR (result, 25.0f, 1.0e-5f);
+}
+
+TEST_F (ComplexVectorOperationsTests, PowerSpectrumZeroPairs)
+{
+    const float data[] = { 3.0f, 4.0f };
+    float result = 99.0f;
+
+    ComplexVectorOperations::powerSpectrum (&result, data, 0);
+
+    // Should not have written anything
+    EXPECT_FLOAT_EQ (result, 99.0f);
+}
+
+TEST_F (ComplexVectorOperationsTests, PowerSpectrumUnitVector)
+{
+    // (1 + 0i): power = 1
+    const float data[] = { 1.0f, 0.0f };
+    float result = 0.0f;
+    ComplexVectorOperations::powerSpectrum (&result, data, 1);
+    EXPECT_NEAR (result, 1.0f, 1.0e-5f);
+}
+
+TEST_F (ComplexVectorOperationsTests, PowerSpectrumZeroVector)
+{
+    // (0 + 0i): power = 0
+    const float data[] = { 0.0f, 0.0f };
+    float result = 99.0f;
+    ComplexVectorOperations::powerSpectrum (&result, data, 1);
+    EXPECT_NEAR (result, 0.0f, 1.0e-5f);
+}
+
+TEST_F (ComplexVectorOperationsTests, PowerSpectrumLargeBuffer)
+{
+    constexpr int complexPairs = 20;
+    float data[complexPairs * 2];
+    float actual[complexPairs] = {};
+
+    for (int i = 0; i < complexPairs * 2; ++i)
+        data[i] = (float) (i + 1) * 0.5f;
+
+    ComplexVectorOperations::powerSpectrum (actual, data, complexPairs);
+
+    for (int i = 0; i < complexPairs; ++i)
+        EXPECT_NEAR (actual[i], data[i * 2] * data[i * 2] + data[i * 2 + 1] * data[i * 2 + 1], 1.0e-4f);
+}
+
+TEST_F (ComplexVectorOperationsTests, PowerSpectrumPythagoreanTriple)
+{
+    // (3, 4): 3^2+4^2=25, (5, 12): 5^2+12^2=169, (8, 15): 8^2+15^2=289
+    const float data[] = { 3.0f, 4.0f, 5.0f, 12.0f, 8.0f, 15.0f };
+    float actual[3] = {};
+
+    ComplexVectorOperations::powerSpectrum (actual, data, 3);
+
+    EXPECT_NEAR (actual[0], 25.0f, 1.0e-4f);
+    EXPECT_NEAR (actual[1], 169.0f, 1.0e-4f);
+    EXPECT_NEAR (actual[2], 289.0f, 1.0e-4f);
+}
diff --git a/tests/yup_simd/yup_SIMDRegister.cpp b/tests/yup_simd/yup_SIMDRegister.cpp
index 3eb2b4d60..43ea61de3 100644
--- a/tests/yup_simd/yup_SIMDRegister.cpp
+++ b/tests/yup_simd/yup_SIMDRegister.cpp
@@ -25,6 +25,51 @@
 
 using namespace yup;
 
+// ==============================================================================
+// Float4 tests
+// ==============================================================================
+
+TEST (SIMDRegisterTests, Float4DefaultConstructorIsZero)
+{
+    Float4 r;
+    float stored[4] = { 1.0f, 2.0f, 3.0f, 4.0f };
+    r.storeUnaligned (stored);
+
+    for (int i = 0; i < 4; ++i)
+        EXPECT_FLOAT_EQ (stored[i], 0.0f);
+}
+
+TEST (SIMDRegisterTests, Float4ZeroHelperIsAllZero)
+{
+    const auto r = Float4::zero();
+    float stored[4] = {};
+    r.storeUnaligned (stored);
+
+    for (int i = 0; i < 4; ++i)
+        EXPECT_FLOAT_EQ (stored[i], 0.0f);
+}
+
+TEST (SIMDRegisterTests, Float4BroadcastFillsAllLanes)
+{
+    const auto r = Float4::broadcast (3.14f);
+    float stored[4] = {};
+    r.storeUnaligned (stored);
+
+    for (int i = 0; i < 4; ++i)
+        EXPECT_FLOAT_EQ (stored[i], 3.14f);
+}
+
+TEST (SIMDRegisterTests, Float4ElementAccessOperator)
+{
+    const float values[4] = { 10.0f, 20.0f, 30.0f, 40.0f };
+    const auto r = Float4::loadUnaligned (values);
+
+    EXPECT_FLOAT_EQ (r[0], 10.0f);
+    EXPECT_FLOAT_EQ (r[1], 20.0f);
+    EXPECT_FLOAT_EQ (r[2], 30.0f);
+    EXPECT_FLOAT_EQ (r[3], 40.0f);
+}
+
 TEST (SIMDRegisterTests, Float4ArithmeticAndHorizontalOps)
 {
     const float aValues[4] = { 1.0f, -2.0f, 3.0f, -4.0f };
@@ -44,6 +89,138 @@ TEST (SIMDRegisterTests, Float4ArithmeticAndHorizontalOps)
     EXPECT_FLOAT_EQ (a.abs().hmax(), 4.0f);
 }
 
+TEST (SIMDRegisterTests, Float4Subtraction)
+{
+    const float aValues[4] = { 10.0f, 20.0f, 30.0f, 40.0f };
+    const float bValues[4] = { 1.0f, 3.0f, 5.0f, 7.0f };
+
+    const auto a = Float4::loadUnaligned (aValues);
+    const auto b = Float4::loadUnaligned (bValues);
+    const auto result = a - b;
+
+    float stored[4] = {};
+    result.storeUnaligned (stored);
+
+    for (int i = 0; i < 4; ++i)
+        EXPECT_FLOAT_EQ (stored[i], aValues[i] - bValues[i]);
+}
+
+TEST (SIMDRegisterTests, Float4Division)
+{
+    const float aValues[4] = { 4.0f, 9.0f, 16.0f, 25.0f };
+    const float bValues[4] = { 2.0f, 3.0f, 4.0f, 5.0f };
+
+    const auto a = Float4::loadUnaligned (aValues);
+    const auto b = Float4::loadUnaligned (bValues);
+    const auto result = a / b;
+
+    float stored[4] = {};
+    result.storeUnaligned (stored);
+
+    for (int i = 0; i < 4; ++i)
+        EXPECT_FLOAT_EQ (stored[i], aValues[i] / bValues[i]);
+}
+
+TEST (SIMDRegisterTests, Float4CompoundAddAssign)
+{
+    const float aValues[4] = { 1.0f, 2.0f, 3.0f, 4.0f };
+    const float bValues[4] = { 10.0f, 20.0f, 30.0f, 40.0f };
+
+    auto a = Float4::loadUnaligned (aValues);
+    const auto b = Float4::loadUnaligned (bValues);
+    a += b;
+
+    float stored[4] = {};
+    a.storeUnaligned (stored);
+
+    for (int i = 0; i < 4; ++i)
+        EXPECT_FLOAT_EQ (stored[i], aValues[i] + bValues[i]);
+}
+
+TEST (SIMDRegisterTests, Float4CompoundMulAssign)
+{
+    const float aValues[4] = { 1.0f, 2.0f, 3.0f, 4.0f };
+    const float bValues[4] = { 2.0f, 3.0f, 4.0f, 5.0f };
+
+    auto a = Float4::loadUnaligned (aValues);
+    const auto b = Float4::loadUnaligned (bValues);
+    a *= b;
+
+    float stored[4] = {};
+    a.storeUnaligned (stored);
+
+    for (int i = 0; i < 4; ++i)
+        EXPECT_FLOAT_EQ (stored[i], aValues[i] * bValues[i]);
+}
+
+TEST (SIMDRegisterTests, Float4ElementwiseMin)
+{
+    const float aValues[4] = { 1.0f, 5.0f, 2.0f, 4.0f };
+    const float bValues[4] = { 3.0f, 2.0f, 4.0f, 1.0f };
+
+    const auto a = Float4::loadUnaligned (aValues);
+    const auto b = Float4::loadUnaligned (bValues);
+    const auto result = a.min (b);
+
+    float stored[4] = {};
+    result.storeUnaligned (stored);
+
+    for (int i = 0; i < 4; ++i)
+        EXPECT_FLOAT_EQ (stored[i], std::min (aValues[i], bValues[i]));
+}
+
+TEST (SIMDRegisterTests, Float4ElementwiseMax)
+{
+    const float aValues[4] = { 1.0f, 5.0f, 2.0f, 4.0f };
+    const float bValues[4] = { 3.0f, 2.0f, 4.0f, 1.0f };
+
+    const auto a = Float4::loadUnaligned (aValues);
+    const auto b = Float4::loadUnaligned (bValues);
+    const auto result = a.max (b);
+
+    float stored[4] = {};
+    result.storeUnaligned (stored);
+
+    for (int i = 0; i < 4; ++i)
+        EXPECT_FLOAT_EQ (stored[i], std::max (aValues[i], bValues[i]));
+}
+
+TEST (SIMDRegisterTests, Float4AbsOnMixedValues)
+{
+    const float values[4] = { -1.0f, 2.0f, -3.0f, 4.0f };
+    const auto r = Float4::loadUnaligned (values);
+    const auto result = r.abs();
+
+    float stored[4] = {};
+    result.storeUnaligned (stored);
+
+    EXPECT_FLOAT_EQ (stored[0], 1.0f);
+    EXPECT_FLOAT_EQ (stored[1], 2.0f);
+    EXPECT_FLOAT_EQ (stored[2], 3.0f);
+    EXPECT_FLOAT_EQ (stored[3], 4.0f);
+}
+
+TEST (SIMDRegisterTests, Float4SumAllLanes)
+{
+    const float values[4] = { 1.0f, 2.0f, 3.0f, 4.0f };
+    const auto r = Float4::loadUnaligned (values);
+    EXPECT_FLOAT_EQ (r.sum(), 10.0f);
+}
+
+TEST (SIMDRegisterTests, Float4SumWithNegatives)
+{
+    const float values[4] = { 1.0f, -1.0f, 2.0f, -2.0f };
+    const auto r = Float4::loadUnaligned (values);
+    EXPECT_FLOAT_EQ (r.sum(), 0.0f);
+}
+
+TEST (SIMDRegisterTests, Float4HmaxFindsLargest)
+{
+    const float values[4] = { -3.0f, 7.0f, 1.0f, -10.0f };
+    const auto r = Float4::loadUnaligned (values);
+    EXPECT_FLOAT_EQ (r.hmax(), 7.0f);
+}
+
 TEST (SIMDRegisterTests, MulAddAndLoadStoreRoundTrip)
 {
     alignas (16) const float base[4] = { 1.0f, 2.0f, 3.0f, 4.0f };
@@ -57,3 +234,217 @@ TEST (SIMDRegisterTests, MulAddAndLoadStoreRoundTrip)
     for (int i = 0; i < 4; ++i)
         EXPECT_FLOAT_EQ (stored[i], base[i] + mul[i] * add[i]);
 }
+
+TEST (SIMDRegisterTests, Float4LoadFromPointerConstructor)
+{
+    const float values[4] = { 5.0f, 6.0f, 7.0f, 8.0f };
+    const Float4 r (values);
+
+    float stored[4] = {};
+    r.storeUnaligned (stored);
+
+    for (int i = 0; i < 4; ++i)
+        EXPECT_FLOAT_EQ (stored[i], values[i]);
+}
+
+TEST (SIMDRegisterTests, Float4ScalarConstructorBroadcasts)
+{
+    const Float4 r (42.0f);
+    for (int i = 0; i < 4; ++i)
+        EXPECT_FLOAT_EQ (r[i], 42.0f);
+}
+
+// ==============================================================================
+// Float8 tests
+// ==============================================================================
+
+TEST (SIMDRegisterTests, Float8DefaultConstructorIsZero)
+{
+    Float8 r;
+    for (int i = 0; i < 8; ++i)
+        EXPECT_FLOAT_EQ (r[i], 0.0f);
+}
+
+TEST (SIMDRegisterTests, Float8BroadcastAndArithmetic)
+{
+    const Float8 a = Float8::broadcast (2.0f);
+    const Float8 b = Float8::broadcast (3.0f);
+    const auto result = a * b;
+
+    for (int i = 0; i < 8; ++i)
+        EXPECT_FLOAT_EQ (result[i], 6.0f);
+}
+
+TEST (SIMDRegisterTests, Float8LoadStoreRoundTrip)
+{
+    float values[8] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f };
+    const auto r = Float8::loadUnaligned (values);
+
+    float stored[8] = {};
+    r.storeUnaligned (stored);
+
+    for (int i = 0; i < 8; ++i)
+        EXPECT_FLOAT_EQ (stored[i], values[i]);
+}
+
+TEST (SIMDRegisterTests, Float8Sum)
+{
+    float values[8] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f };
+    const auto r = Float8::loadUnaligned (values);
+    EXPECT_FLOAT_EQ (r.sum(), 36.0f);
+}
+
+TEST (SIMDRegisterTests, Float8Hmax)
+{
+    float values[8] = { 1.0f, -5.0f, 3.0f, 9.0f, 2.0f, -3.0f, 4.0f, 0.0f };
+    const auto r = Float8::loadUnaligned (values);
+    EXPECT_FLOAT_EQ (r.hmax(), 9.0f);
+}
+
+TEST (SIMDRegisterTests, Float8AbsNegatesAll)
+{
+    float values[8];
+    for (int i = 0; i < 8; ++i)
+        values[i] = (i % 2 == 0) ? -(float) (i + 1) : (float) (i + 1);
+
+    const auto r = Float8::loadUnaligned (values).abs();
+    for (int i = 0; i < 8; ++i)
+        EXPECT_FLOAT_EQ (r[i], (float) (i + 1));
+}
+
+// ==============================================================================
+// Double2 tests
+// ==============================================================================
+
+TEST (SIMDRegisterTests, Double2DefaultConstructorIsZero)
+{
+    Double2 r;
+    for (int i = 0; i < 2; ++i)
+        EXPECT_DOUBLE_EQ (r[i], 0.0);
+}
+
+TEST (SIMDRegisterTests, Double2ArithmeticOperations)
+{
+    const double aValues[2] = { 1.5, -2.5 };
+    const double bValues[2] = { 3.0, 4.0 };
+
+    const auto a = Double2::loadUnaligned (aValues);
+    const auto b = Double2::loadUnaligned (bValues);
+
+    const auto sum = a + b;
+    const auto diff = a - b;
+    const auto prod = a * b;
+    const auto quot = a / b;
+
+    EXPECT_DOUBLE_EQ (sum[0], aValues[0] + bValues[0]);
+    EXPECT_DOUBLE_EQ (sum[1], aValues[1] + bValues[1]);
+    EXPECT_DOUBLE_EQ (diff[0], aValues[0] - bValues[0]);
+    EXPECT_DOUBLE_EQ (diff[1], aValues[1] - bValues[1]);
+    EXPECT_DOUBLE_EQ (prod[0], aValues[0] * bValues[0]);
+    EXPECT_DOUBLE_EQ (prod[1], aValues[1] * bValues[1]);
+    EXPECT_DOUBLE_EQ (quot[0], aValues[0] / bValues[0]);
+    EXPECT_DOUBLE_EQ (quot[1], aValues[1] / bValues[1]);
+}
+
+TEST (SIMDRegisterTests, Double2BroadcastAndSum)
+{
+    const auto r = Double2::broadcast (3.14);
+    EXPECT_NEAR (r.sum(), 6.28, 1.0e-12);
+}
+
+TEST (SIMDRegisterTests, Double2MinMax)
+{
+    const double aValues[2] = { 1.0, 5.0 };
+    const double bValues[2] = { 3.0, 2.0 };
+
+    const auto a = Double2::loadUnaligned (aValues);
+    const auto b = Double2::loadUnaligned (bValues);
+
+    const auto minResult = a.min (b);
+    const auto maxResult = a.max (b);
+
+    EXPECT_DOUBLE_EQ (minResult[0], 1.0);
+    EXPECT_DOUBLE_EQ (minResult[1], 2.0);
+    EXPECT_DOUBLE_EQ (maxResult[0], 3.0);
+    EXPECT_DOUBLE_EQ (maxResult[1], 5.0);
+}
+
+// ==============================================================================
+// Double4 tests
+// ==============================================================================
+
+TEST (SIMDRegisterTests, Double4LoadStoreRoundTrip)
+{
+    const double values[4] = { 1.1, 2.2, 3.3, 4.4 };
+    const auto r = Double4::loadUnaligned (values);
+
+    double stored[4] = {};
+    r.storeUnaligned (stored);
+
+    for (int i = 0; i < 4; ++i)
+        EXPECT_DOUBLE_EQ (stored[i], values[i]);
+}
+
+TEST (SIMDRegisterTests, Double4MulAdd)
+{
+    const double baseValues[4] = { 1.0, 2.0, 3.0, 4.0 };
+    const double mulValues[4] = { 2.0, 3.0, 4.0, 5.0 };
+    const double addValues[4] = { 10.0, 20.0, 30.0, 40.0 };
+
+    const auto base = Double4::loadUnaligned (baseValues);
+    const auto mul = Double4::loadUnaligned (mulValues);
+    const auto add = Double4::loadUnaligned (addValues);
+    const auto result = base.mulAdd (mul, add);
+
+    for (int i = 0; i < 4; ++i)
+        EXPECT_DOUBLE_EQ (result[i], baseValues[i] + mulValues[i] * addValues[i]);
+}
+
+TEST (SIMDRegisterTests, Double4Sum)
+{
+    const double values[4] = { 1.0, 2.0, 3.0, 4.0 };
+    const auto r = Double4::loadUnaligned (values);
+    EXPECT_DOUBLE_EQ (r.sum(), 10.0);
+}
+
+TEST (SIMDRegisterTests, Double4Hmax)
+{
+    const double values[4] = { -1.0, 3.5, 2.0, -4.0 };
+    const auto r = Double4::loadUnaligned (values);
+    EXPECT_DOUBLE_EQ (r.hmax(), 3.5);
+}
+
+// ==============================================================================
+// General SIMDRegister<T,N> with non-power-of-two N
+// ==============================================================================
+
+TEST (SIMDRegisterTests, SIMDRegisterFloat5PartialBatch)
+{
+    using Float5 = SIMDRegister<float, 5>;
+    const float values[5] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f };
+    const auto r = Float5::loadUnaligned (values);
+
+    float stored[5] = {};
+    r.storeUnaligned (stored);
+
+    for (int i = 0; i < 5; ++i)
+        EXPECT_FLOAT_EQ (stored[i], values[i]);
+}
+
+TEST (SIMDRegisterTests, SIMDRegisterFloat5Sum)
+{
+    using Float5 = SIMDRegister<float, 5>;
+    const float values[5] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f };
+    const auto r = Float5::loadUnaligned (values);
+    EXPECT_FLOAT_EQ (r.sum(), 15.0f);
+}
+
+TEST (SIMDRegisterTests, SIMDRegisterFloat1ElementAccess)
+{
+    using Float1 = SIMDRegister<float, 1>;
+    const float value = 7.0f;
+    const Float1 r (value);
+    EXPECT_FLOAT_EQ (r[0], value);
+    EXPECT_FLOAT_EQ (r.sum(), value);
+    EXPECT_FLOAT_EQ (r.hmax(), value);
+}
diff --git a/tests/yup_simd/yup_Vec.cpp b/tests/yup_simd/yup_Vec.cpp
new file mode 100644
index 000000000..61ad33e0d
--- /dev/null
+++ b/tests/yup_simd/yup_Vec.cpp
@@ -0,0 +1,358 @@
+/*
+  ==============================================================================
+
+   This file is part of the YUP library.
+   Copyright (c) 2026 - kunitoki@gmail.com
+
+   YUP is an open source library subject to open-source licensing.
+
+   The code included in this file is provided under the terms of the ISC license
+   http://www.isc.org/downloads/software-support-policy/isc-license. Permission
+   to use, copy, modify, and/or distribute this software for any purpose with or
+   without fee is hereby granted provided that the above copyright notice and
+   this permission notice appear in all copies.
+
+   YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+#include <gtest/gtest.h>
+
+#include <yup_simd/yup_simd.h>
+
+#include <cmath>
+
+using namespace yup;
+
+// ==============================================================================
+// Vec2f tests
+// ==============================================================================
+
+TEST (Vec2fTests, DefaultConstructorZeroInitialises)
+{
+    const Vec2f v;
+    EXPECT_FLOAT_EQ (v.x, 0.0f);
+    EXPECT_FLOAT_EQ (v.y, 0.0f);
+}
+
+TEST (Vec2fTests, ParameterisedConstructor)
+{
+    const Vec2f v (3.0f, -7.5f);
+    EXPECT_FLOAT_EQ (v.x, 3.0f);
+    EXPECT_FLOAT_EQ (v.y, -7.5f);
+}
+
+TEST (Vec2fTests, Addition)
+{
+    const Vec2f a (1.0f, 2.0f);
+    const Vec2f b (3.0f, 4.0f);
+    const Vec2f r = a + b;
+    EXPECT_FLOAT_EQ (r.x, 4.0f);
+    EXPECT_FLOAT_EQ (r.y, 6.0f);
+}
+
+TEST (Vec2fTests, AdditionWithNegatives)
+{
+    const Vec2f a (5.0f, -3.0f);
+    const Vec2f b (-2.0f, 7.0f);
+    const Vec2f r = a + b;
+    EXPECT_FLOAT_EQ (r.x, 3.0f);
+    EXPECT_FLOAT_EQ (r.y, 4.0f);
+}
+
+TEST (Vec2fTests, AdditionIdentity)
+{
+    const Vec2f a (10.0f, -5.0f);
+    const Vec2f zero;
+    const Vec2f r = a + zero;
+    EXPECT_FLOAT_EQ (r.x, a.x);
+    EXPECT_FLOAT_EQ (r.y, a.y);
+}
+
+TEST (Vec2fTests, ScalarMultiplication)
+{
+    const Vec2f v (2.0f, -4.0f);
+    const Vec2f r = v * 3.0f;
+    EXPECT_FLOAT_EQ (r.x, 6.0f);
+    EXPECT_FLOAT_EQ (r.y, -12.0f);
+}
+
+TEST (Vec2fTests, ScalarMultiplicationByZero)
+{
+    const Vec2f v (99.0f, -77.0f);
+    const Vec2f r = v * 0.0f;
+    EXPECT_FLOAT_EQ (r.x, 0.0f);
+    EXPECT_FLOAT_EQ (r.y, 0.0f);
+}
+
+TEST (Vec2fTests, ScalarMultiplicationByOne)
+{
+    const Vec2f v (3.5f, -1.5f);
+    const Vec2f r = v * 1.0f;
+    EXPECT_FLOAT_EQ (r.x, v.x);
+    EXPECT_FLOAT_EQ (r.y, v.y);
+}
+
+TEST (Vec2fTests, DotProduct)
+{
+    const Vec2f a (1.0f, 0.0f);
+    const Vec2f b (0.0f, 1.0f);
+    EXPECT_FLOAT_EQ (a.dot (b), 0.0f); // orthogonal
+}
+
+TEST (Vec2fTests, DotProductCollinear)
+{
+    const Vec2f a (1.0f, 0.0f);
+    EXPECT_FLOAT_EQ (a.dot (a), 1.0f);
+}
+
+TEST (Vec2fTests, DotProductGeneral)
+{
+    // (1,2)·(3,4) = 1*3 + 2*4 = 11
+    const Vec2f a (1.0f, 2.0f);
+    const Vec2f b (3.0f, 4.0f);
+    EXPECT_FLOAT_EQ (a.dot (b), 11.0f);
+}
+
+TEST (Vec2fTests, LengthOf345Triangle)
+{
+    const Vec2f v (3.0f, 4.0f);
+    EXPECT_NEAR (v.length(), 5.0f, 1.0e-5f);
+}
+
+TEST (Vec2fTests, LengthOfZeroVector)
+{
+    const Vec2f v;
+    EXPECT_NEAR (v.length(), 0.0f, 1.0e-7f);
+}
+
+TEST (Vec2fTests, LengthOfUnitVector)
+{
+    const Vec2f v (1.0f, 0.0f);
+    EXPECT_NEAR (v.length(), 1.0f, 1.0e-7f);
+}
+
+TEST (Vec2fTests, NormalizedHasUnitLength)
+{
+    const Vec2f v (3.0f, 4.0f);
+    const Vec2f n = v.normalized();
+    EXPECT_NEAR (n.length(), 1.0f, 1.0e-6f);
+}
+
+TEST (Vec2fTests, NormalizedDirection)
+{
+    const Vec2f v (3.0f, 4.0f);
+    const Vec2f n = v.normalized();
+    EXPECT_NEAR (n.x, 0.6f, 1.0e-6f);
+    EXPECT_NEAR (n.y, 0.8f, 1.0e-6f);
+}
+
+TEST (Vec2fTests, NormalizedZeroVectorReturnsZero)
+{
+    const Vec2f v;
+    const Vec2f n = v.normalized();
+    EXPECT_FLOAT_EQ (n.x, 0.0f);
+    EXPECT_FLOAT_EQ (n.y, 0.0f);
+}
+
+// ==============================================================================
+// Vec4f tests
+// ==============================================================================
+
+TEST (Vec4fTests, DefaultConstructorZeroInitialises)
+{
+    const Vec4f v;
+    EXPECT_FLOAT_EQ (v.r, 0.0f);
+    EXPECT_FLOAT_EQ (v.g, 0.0f);
+    EXPECT_FLOAT_EQ (v.b, 0.0f);
+    EXPECT_FLOAT_EQ (v.a, 0.0f);
+}
+
+TEST (Vec4fTests, ParameterisedConstructor)
+{
+    const Vec4f v (0.1f, 0.2f, 0.3f, 1.0f);
+    EXPECT_FLOAT_EQ (v.r, 0.1f);
+    EXPECT_FLOAT_EQ (v.g, 0.2f);
+    EXPECT_FLOAT_EQ (v.b, 0.3f);
+    EXPECT_FLOAT_EQ (v.a, 1.0f);
+}
+
+TEST (Vec4fTests, Addition)
+{
+    const Vec4f a (0.1f, 0.2f, 0.3f, 0.4f);
+    const Vec4f b (0.4f, 0.3f, 0.2f, 0.1f);
+    const Vec4f r = a + b;
+    EXPECT_NEAR (r.r, 0.5f, 1.0e-6f);
+    EXPECT_NEAR (r.g, 0.5f, 1.0e-6f);
+    EXPECT_NEAR (r.b, 0.5f, 1.0e-6f);
+    EXPECT_NEAR (r.a, 0.5f, 1.0e-6f);
+}
+
+TEST (Vec4fTests, AdditionIdentity)
+{
+    const Vec4f a (1.0f, 2.0f, 3.0f, 4.0f);
+    const Vec4f zero;
+    const Vec4f r = a + zero;
+    EXPECT_NEAR (r.r, a.r, 1.0e-6f);
+    EXPECT_NEAR (r.g, a.g, 1.0e-6f);
+    EXPECT_NEAR (r.b, a.b, 1.0e-6f);
+    EXPECT_NEAR (r.a, a.a, 1.0e-6f);
+}
+
+TEST (Vec4fTests, ScalarMultiplication)
+{
+    const Vec4f v (1.0f, 2.0f, 3.0f, 4.0f);
+    const Vec4f r = v * 2.0f;
+    EXPECT_NEAR (r.r, 2.0f, 1.0e-6f);
+    EXPECT_NEAR (r.g, 4.0f, 1.0e-6f);
+    EXPECT_NEAR (r.b, 6.0f, 1.0e-6f);
+    EXPECT_NEAR (r.a, 8.0f, 1.0e-6f);
+}
+
+TEST (Vec4fTests, ScalarMultiplicationByZero)
+{
+    const Vec4f v (9.0f, 8.0f, 7.0f, 6.0f);
+    const Vec4f r = v * 0.0f;
+    EXPECT_NEAR (r.r, 0.0f, 1.0e-6f);
+    EXPECT_NEAR (r.g, 0.0f, 1.0e-6f);
+    EXPECT_NEAR (r.b, 0.0f, 1.0e-6f);
+    EXPECT_NEAR (r.a, 0.0f, 1.0e-6f);
+}
+
+TEST (Vec4fTests, ScalarMultiplicationByOne)
+{
+    const Vec4f v (5.0f, 6.0f, 7.0f, 8.0f);
+    const Vec4f r = v * 1.0f;
+    EXPECT_NEAR (r.r, v.r, 1.0e-6f);
+    EXPECT_NEAR (r.g, v.g, 1.0e-6f);
+    EXPECT_NEAR (r.b, v.b, 1.0e-6f);
+    EXPECT_NEAR (r.a, v.a, 1.0e-6f);
+}
+
+TEST (Vec4fTests, ElementWiseMultiplication)
+{
+    const Vec4f a (2.0f, 3.0f, 4.0f, 5.0f);
+    const Vec4f b (1.5f, 2.0f, 0.5f, 0.25f);
+    const Vec4f r = a * b;
+    EXPECT_NEAR (r.r, 3.0f, 1.0e-6f);
+    EXPECT_NEAR (r.g, 6.0f, 1.0e-6f);
+    EXPECT_NEAR (r.b, 2.0f, 1.0e-6f);
+    EXPECT_NEAR (r.a, 1.25f, 1.0e-6f);
+}
+
+TEST (Vec4fTests, ElementWiseMultiplicationByZeroVec)
+{
+    const Vec4f a (3.0f, 4.0f, 5.0f, 6.0f);
+    const Vec4f zero;
+    const Vec4f r = a * zero;
+    EXPECT_NEAR (r.r, 0.0f, 1.0e-6f);
+    EXPECT_NEAR (r.g, 0.0f, 1.0e-6f);
+    EXPECT_NEAR (r.b, 0.0f, 1.0e-6f);
+    EXPECT_NEAR (r.a, 0.0f, 1.0e-6f);
+}
+
+TEST (Vec4fTests, LerpAtZeroReturnsThis)
+{
+    const Vec4f a (0.0f, 0.0f, 0.0f, 1.0f);
+    const Vec4f b (1.0f, 1.0f, 1.0f, 1.0f);
+    const Vec4f r = a.lerp (b, 0.0f);
+    EXPECT_NEAR (r.r, a.r, 1.0e-5f);
+    EXPECT_NEAR (r.g, a.g, 1.0e-5f);
+    EXPECT_NEAR (r.b, a.b, 1.0e-5f);
+    EXPECT_NEAR (r.a, a.a, 1.0e-5f);
+}
+
+TEST (Vec4fTests, LerpAtOneReturnsOther)
+{
+    const Vec4f a (0.0f, 0.0f, 0.0f, 1.0f);
+    const Vec4f b (1.0f, 0.5f, 0.25f, 0.75f);
+    const Vec4f r = a.lerp (b, 1.0f);
+    EXPECT_NEAR (r.r, b.r, 1.0e-5f);
+    EXPECT_NEAR (r.g, b.g, 1.0e-5f);
+    EXPECT_NEAR (r.b, b.b, 1.0e-5f);
+    EXPECT_NEAR (r.a, b.a, 1.0e-5f);
+}
+
+TEST (Vec4fTests, LerpAtHalfIsMidpoint)
+{
+    const Vec4f a (0.0f, 0.0f, 0.0f, 0.0f);
+    const Vec4f b (2.0f, 4.0f, 6.0f, 8.0f);
+    const Vec4f r = a.lerp (b, 0.5f);
+    EXPECT_NEAR (r.r, 1.0f, 1.0e-5f);
+    EXPECT_NEAR (r.g, 2.0f, 1.0e-5f);
+    EXPECT_NEAR (r.b, 3.0f, 1.0e-5f);
+    EXPECT_NEAR (r.a, 4.0f, 1.0e-5f);
+}
+
+TEST (Vec4fTests, LerpQuarterPoint)
+{
+    const Vec4f a (0.0f, 0.0f, 0.0f, 0.0f);
+    const Vec4f b (4.0f, 8.0f, 12.0f, 16.0f);
+    const Vec4f r = a.lerp (b, 0.25f);
+    EXPECT_NEAR (r.r, 1.0f, 1.0e-5f);
+    EXPECT_NEAR (r.g, 2.0f, 1.0e-5f);
+    EXPECT_NEAR (r.b, 3.0f, 1.0e-5f);
+    EXPECT_NEAR (r.a, 4.0f, 1.0e-5f);
+}
+
+TEST (Vec4fTests, PremultipliedFullAlpha)
+{
+    // alpha=1: RGB unchanged
+    const Vec4f v (0.5f, 0.25f, 0.75f, 1.0f);
+    const Vec4f p = v.premultiplied();
+    EXPECT_NEAR (p.r, 0.5f, 1.0e-6f);
+    EXPECT_NEAR (p.g, 0.25f, 1.0e-6f);
+    EXPECT_NEAR (p.b, 0.75f, 1.0e-6f);
+    EXPECT_NEAR (p.a, 1.0f, 1.0e-6f);
+}
+
+TEST (Vec4fTests, PremultipliedZeroAlpha)
+{
+    // alpha=0: all channels become 0
+    const Vec4f v (1.0f, 1.0f, 1.0f, 0.0f);
+    const Vec4f p = v.premultiplied();
+    EXPECT_NEAR (p.r, 0.0f, 1.0e-6f);
+    EXPECT_NEAR (p.g, 0.0f, 1.0e-6f);
+    EXPECT_NEAR (p.b, 0.0f, 1.0e-6f);
+    EXPECT_NEAR (p.a, 0.0f, 1.0e-6f);
+}
+
+TEST (Vec4fTests, PremultipliedHalfAlpha)
+{
+    // alpha=0.5: each channel halved
+    const Vec4f v (0.8f, 0.6f, 0.4f, 0.5f);
+    const Vec4f p = v.premultiplied();
+    EXPECT_NEAR (p.r, 0.4f, 1.0e-6f);
+    EXPECT_NEAR (p.g, 0.3f, 1.0e-6f);
+    EXPECT_NEAR (p.b, 0.2f, 1.0e-6f);
+    EXPECT_NEAR (p.a, 0.5f, 1.0e-6f);
+}
+
+TEST (Vec4fTests, LoadAndStore)
+{
+    const float src[] = { 1.0f, 2.0f, 3.0f, 4.0f };
+    const Vec4f v = Vec4f::load (src);
+    EXPECT_FLOAT_EQ (v.r, 1.0f);
+    EXPECT_FLOAT_EQ (v.g, 2.0f);
+    EXPECT_FLOAT_EQ (v.b, 3.0f);
+    EXPECT_FLOAT_EQ (v.a, 4.0f);
+
+    float dst[4] = {};
+    v.store (dst);
+    EXPECT_FLOAT_EQ (dst[0], 1.0f);
+    EXPECT_FLOAT_EQ (dst[1], 2.0f);
+    EXPECT_FLOAT_EQ (dst[2], 3.0f);
+    EXPECT_FLOAT_EQ (dst[3], 4.0f);
+}
+
+TEST (Vec4fTests, LoadRoundTrip)
+{
+    const float original[] = { 7.5f, -3.25f, 0.0f, 1.0f };
+    float result[4] = {};
+    Vec4f::load (original).store (result);
+    for (int i = 0; i < 4; ++i)
+        EXPECT_FLOAT_EQ (result[i], original[i]);
+}

From abb2c637f54b6586ae19d97773dc7df03e7293f6 Mon Sep 17 00:00:00 2001
From: kunitoki <kunitoki@gmail.com>
Date: Sat, 30 May 2026 08:52:40 +0200
Subject: [PATCH 4/5] Fix issue

---
 tests/yup_simd/yup_FloatVectorOperations.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/yup_simd/yup_FloatVectorOperations.cpp b/tests/yup_simd/yup_FloatVectorOperations.cpp
index 748a81a8d..13219d79a 100644
--- a/tests/yup_simd/yup_FloatVectorOperations.cpp
+++ b/tests/yup_simd/yup_FloatVectorOperations.cpp
@@ -462,6 +462,7 @@ TEST_F (FloatVectorOperationsTests, FloatToDoubleAndBack)
 TEST_F (FloatVectorOperationsTests, MidSideEncodeDecodeRoundTrip)
 {
     Random& random = Random::getSystemRandom();
+    constexpr float roundTripTolerance = 1.0e-4f;
 
     for (int i = 500; --i >= 0;)
     {
@@ -503,8 +504,8 @@ TEST_F (FloatVectorOperationsTests, MidSideEncodeDecodeRoundTrip)
 
         for (int j = 0; j < num; ++j)
         {
-            EXPECT_NEAR (leftOut[j], left[j], std::numeric_limits<float>::epsilon());
-            EXPECT_NEAR (rightOut[j], right[j], std::numeric_limits<float>::epsilon());
+            EXPECT_NEAR (leftOut[j], left[j], roundTripTolerance);
+            EXPECT_NEAR (rightOut[j], right[j], roundTripTolerance);
         }
     }
 }

From 8d19bd3b30f3056680ea417b1a813c33aad2b086 Mon Sep 17 00:00:00 2001
From: kunitoki <kunitoki@gmail.com>
Date: Sat, 30 May 2026 21:47:10 +0200
Subject: [PATCH 5/5] More numeric stability

---
 .../buffers/yup_ComplexVectorOperations.cpp   | 36 ++++++++++---------
 tests/yup_dsp/yup_BiquadCascade.cpp           |  6 ++--
 2 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/modules/yup_simd/buffers/yup_ComplexVectorOperations.cpp b/modules/yup_simd/buffers/yup_ComplexVectorOperations.cpp
index b80fcc185..bd51937a7 100644
--- a/modules/yup_simd/buffers/yup_ComplexVectorOperations.cpp
+++ b/modules/yup_simd/buffers/yup_ComplexVectorOperations.cpp
@@ -30,20 +30,22 @@ void complexMultiply (const float* __restrict a, const float* __restrict b, floa
 
 #if YUP_USE_AVX_INTRINSICS && YUP_USE_FMA_INTRINSICS
     constexpr int simdWidth = 4;
+    const __m256 signs = _mm256_set_ps (1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f);
+
     for (; i <= complexPairs - simdWidth; i += simdWidth)
     {
         const int idx = i * 2;
 
-        __m256 av = _mm256_loadu_ps (a + idx);
-        __m256 bv = _mm256_loadu_ps (b + idx);
-
-        const __m256 aShuffled = _mm256_permute_ps (av, _MM_SHUFFLE (2, 3, 0, 1));
-        const __m256 bShuffled = _mm256_permute_ps (bv, _MM_SHUFFLE (2, 3, 0, 1));
+        const __m256 av = _mm256_loadu_ps (a + idx);
+        const __m256 bv = _mm256_loadu_ps (b + idx);
 
-        __m256 realPart = _mm256_fmsub_ps (av, bv, _mm256_mul_ps (aShuffled, bShuffled));
-        __m256 imagPart = _mm256_fmadd_ps (av, bShuffled, _mm256_mul_ps (aShuffled, bv));
+        const __m256 aReal = _mm256_permute_ps (av, _MM_SHUFFLE (2, 2, 0, 0));
+        const __m256 aImag = _mm256_permute_ps (av, _MM_SHUFFLE (3, 3, 1, 1));
+        const __m256 bSwapped = _mm256_permute_ps (bv, _MM_SHUFFLE (2, 3, 0, 1));
+        const __m256 realProducts = _mm256_mul_ps (aReal, bv);
+        const __m256 imagProducts = _mm256_mul_ps (aImag, bSwapped);
 
-        __m256 interleaved = _mm256_blend_ps (realPart, imagPart, 0b10101010);
+        __m256 interleaved = _mm256_fmadd_ps (imagProducts, signs, realProducts);
 
         if (accumulate)
             interleaved = _mm256_add_ps (_mm256_loadu_ps (y + idx), interleaved);
@@ -53,20 +55,22 @@ void complexMultiply (const float* __restrict a, const float* __restrict b, floa
 
 #elif YUP_USE_SSE_INTRINSICS
     constexpr int simdWidth = 2;
+    const __m128 signs = _mm_set_ps (1.0f, -1.0f, 1.0f, -1.0f);
+
     for (; i <= complexPairs - simdWidth; i += simdWidth)
     {
         const int idx = i * 2;
 
-        __m128 av = _mm_loadu_ps (a + idx);
-        __m128 bv = _mm_loadu_ps (b + idx);
-
-        const __m128 aShuffled = _mm_shuffle_ps (av, av, _MM_SHUFFLE (2, 3, 0, 1));
-        const __m128 bShuffled = _mm_shuffle_ps (bv, bv, _MM_SHUFFLE (2, 3, 0, 1));
+        const __m128 av = _mm_loadu_ps (a + idx);
+        const __m128 bv = _mm_loadu_ps (b + idx);
 
-        __m128 realPart = _mm_sub_ps (_mm_mul_ps (av, bv), _mm_mul_ps (aShuffled, bShuffled));
-        __m128 imagPart = _mm_add_ps (_mm_mul_ps (av, bShuffled), _mm_mul_ps (aShuffled, bv));
+        const __m128 aReal = _mm_shuffle_ps (av, av, _MM_SHUFFLE (2, 2, 0, 0));
+        const __m128 aImag = _mm_shuffle_ps (av, av, _MM_SHUFFLE (3, 3, 1, 1));
+        const __m128 bSwapped = _mm_shuffle_ps (bv, bv, _MM_SHUFFLE (2, 3, 0, 1));
+        const __m128 realProducts = _mm_mul_ps (aReal, bv);
+        const __m128 imagProducts = _mm_mul_ps (aImag, bSwapped);
 
-        __m128 interleaved = _mm_unpacklo_ps (realPart, imagPart);
+        __m128 interleaved = _mm_add_ps (realProducts, _mm_mul_ps (imagProducts, signs));
 
         if (accumulate)
             interleaved = _mm_add_ps (_mm_loadu_ps (y + idx), interleaved);
diff --git a/tests/yup_dsp/yup_BiquadCascade.cpp b/tests/yup_dsp/yup_BiquadCascade.cpp
index 86f393b5f..e9ae8bc02 100644
--- a/tests/yup_dsp/yup_BiquadCascade.cpp
+++ b/tests/yup_dsp/yup_BiquadCascade.cpp
@@ -398,11 +398,11 @@ TEST_F (BiquadCascadeFilterTests, StabilityCheck)
     auto coeffs = FilterDesigner<double>::designRbjLowpass (5000.0, 50.0, sampleRate);
     cascadeFloat.setSectionCoefficients (0, coeffs);
 
-    // Process white noise-like signal
+    // Process deterministic white noise-like signal
     std::vector<float> noiseInput (blockSize);
-    WhiteNoise noise;
+    WhiteNoise noise (12345);
     for (int i = 0; i < blockSize; ++i)
-        noiseInput[i] = noise.getNextSample();
+        noiseInput[i] = 0.1f * noise.getNextSample();
 
     cascadeFloat.processBlock (noiseInput.data(), outputData.data(), blockSize);