From 8a05f10c9321602920ceeb1c19103aef51a831ff Mon Sep 17 00:00:00 2001 From: kunitoki Date: Fri, 29 May 2026 10:17:59 +0200 Subject: [PATCH 1/5] Initial checkin of yup_simd --- cmake/platforms/yup_emscripten.cmake | 2 + .../convolution/yup_PartitionedConvolver.cpp | 105 +---- .../yup_dsp/metering/yup_LevelProcessor.cpp | 18 +- modules/yup_dsp/utilities/yup_DspMath.cpp | 73 +--- modules/yup_graphics/graphics/yup_Color.h | 22 + modules/yup_graphics/imaging/yup_Image.cpp | 41 +- .../primitives/yup_AffineTransform.h | 6 + modules/yup_graphics/yup_graphics.h | 3 +- .../buffers/yup_AffineTransformOperations.cpp | 63 +++ .../buffers/yup_AffineTransformOperations.h | 38 ++ .../buffers/yup_ColorVectorOperations.cpp | 113 +++++ .../buffers/yup_ColorVectorOperations.h | 53 +++ .../buffers/yup_ComplexVectorOperations.cpp | 172 ++++++++ .../buffers/yup_ComplexVectorOperations.h | 62 +++ .../buffers/yup_FloatVectorOperations.cpp | 179 ++++++++ .../buffers/yup_FloatVectorOperations.h | 18 +- modules/yup_simd/types/yup_EigenAdapters.h | 43 ++ modules/yup_simd/types/yup_SIMDRegister.h | 390 ++++++++++++++++++ modules/yup_simd/types/yup_Vec.h | 137 ++++++ modules/yup_simd/yup_simd.cpp | 3 + modules/yup_simd/yup_simd.h | 22 +- tests/yup_graphics/yup_Image.cpp | 54 +++ tests/yup_simd.cpp | 4 + .../yup_AffineTransformOperations.cpp | 50 +++ tests/yup_simd/yup_ColorVectorOperations.cpp | 128 ++++++ .../yup_simd/yup_ComplexVectorOperations.cpp | 93 +++++ tests/yup_simd/yup_FloatVectorOperations.cpp | 49 +++ tests/yup_simd/yup_SIMDRegister.cpp | 59 +++ thirdparty/xsimd/xsimd.h | 45 ++ 29 files changed, 1827 insertions(+), 218 deletions(-) create mode 100644 modules/yup_simd/buffers/yup_AffineTransformOperations.cpp create mode 100644 modules/yup_simd/buffers/yup_AffineTransformOperations.h create mode 100644 modules/yup_simd/buffers/yup_ColorVectorOperations.cpp create mode 100644 modules/yup_simd/buffers/yup_ColorVectorOperations.h create mode 100644 modules/yup_simd/buffers/yup_ComplexVectorOperations.cpp create mode 100644 modules/yup_simd/buffers/yup_ComplexVectorOperations.h create mode 100644 modules/yup_simd/types/yup_EigenAdapters.h create mode 100644 modules/yup_simd/types/yup_SIMDRegister.h create mode 100644 modules/yup_simd/types/yup_Vec.h create mode 100644 tests/yup_simd/yup_AffineTransformOperations.cpp create mode 100644 tests/yup_simd/yup_ColorVectorOperations.cpp create mode 100644 tests/yup_simd/yup_ComplexVectorOperations.cpp create mode 100644 tests/yup_simd/yup_SIMDRegister.cpp create mode 100644 thirdparty/xsimd/xsimd.h diff --git a/cmake/platforms/yup_emscripten.cmake b/cmake/platforms/yup_emscripten.cmake index 4710c7eca..c1f36d3d7 100644 --- a/cmake/platforms/yup_emscripten.cmake +++ b/cmake/platforms/yup_emscripten.cmake @@ -16,3 +16,5 @@ # DISCLAIMED. # # ============================================================================== + +add_compile_options (-msimd128) diff --git a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp index ac027993e..7e1a0a47b 100644 --- a/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp +++ b/modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp @@ -22,109 +22,6 @@ namespace yup { -//============================================================================== - -/** Performs Y += A * B (complex multiply accumulate) where A, B, and Y - are arrays of interleaved complex values [real, imag, real, imag...]. - - @param A pointer to input complex array - @param B pointer to input complex array - @param Y pointer to output complex array (accumulated) - @param complexPairs number of complex pairs (not number of floats!) -*/ -static void complexMultiplyAccumulate (const float* __restrict A, const float* __restrict B, float* __restrict Y, int complexPairs) noexcept -{ - int i = 0; - -#if YUP_USE_AVX_INTRINSICS - constexpr int simdWidth = 4; // AVX2 path: process 4 complex pairs (8 floats) at a time - for (; i <= complexPairs - simdWidth; i += simdWidth) - { - const int idx = i * 2; - - __m256 a = _mm256_loadu_ps (A + idx); - __m256 b = _mm256_loadu_ps (B + idx); - __m256 y = _mm256_loadu_ps (Y + idx); - - const __m256 a_shuffled = _mm256_permute_ps (a, _MM_SHUFFLE (2, 3, 0, 1)); - const __m256 b_shuffled = _mm256_permute_ps (b, _MM_SHUFFLE (2, 3, 0, 1)); - - __m256 realPart = _mm256_fmsub_ps (a, b, _mm256_mul_ps (a_shuffled, b_shuffled)); - __m256 imagPart = _mm256_fmadd_ps (a, b_shuffled, _mm256_mul_ps (a_shuffled, b)); - - const __m256 interleaved = _mm256_blend_ps (realPart, imagPart, 0b10101010); - - y = _mm256_add_ps (y, interleaved); - _mm256_storeu_ps (Y + idx, y); - } - -#elif YUP_USE_SSE_INTRINSICS - constexpr int simdWidth = 2; // SSE path: process 2 complex pairs (4 floats) at a time - for (; i <= complexPairs - simdWidth; i += simdWidth) - { - const int idx = i * 2; - - __m128 a = _mm_loadu_ps (A + idx); - __m128 b = _mm_loadu_ps (B + idx); - __m128 y = _mm_loadu_ps (Y + idx); - - const __m128 a_shuffled = _mm_shuffle_ps (a, a, _MM_SHUFFLE (2, 3, 0, 1)); - const __m128 b_shuffled = _mm_shuffle_ps (b, b, _MM_SHUFFLE (2, 3, 0, 1)); - - __m128 realPart = _mm_sub_ps (_mm_mul_ps (a, b), _mm_mul_ps (a_shuffled, b_shuffled)); - __m128 imagPart = _mm_add_ps (_mm_mul_ps (a, b_shuffled), _mm_mul_ps (a_shuffled, b)); - - const __m128 interleaved = _mm_unpacklo_ps (realPart, imagPart); - - y = _mm_add_ps (y, interleaved); - _mm_storeu_ps (Y + idx, y); - } - -#elif YUP_USE_ARM_NEON - constexpr int simdWidth = 4; - for (; i <= complexPairs - simdWidth; i += simdWidth) - { - const int idx = i * 2; - - float32x4x2_t a = vld2q_f32 (A + idx); - float32x4x2_t b = vld2q_f32 (B + idx); - float32x4x2_t y = vld2q_f32 (Y + idx); - - float32x4_t ar = a.val[0], ai = a.val[1]; - float32x4_t br = b.val[0], bi = b.val[1]; - float32x4_t yr = y.val[0], yi = y.val[1]; - - float32x4_t real = vmulq_f32 (ar, br); - real = vfmsq_f32 (real, ai, bi); - float32x4_t imag = vmulq_f32 (ar, bi); - imag = vfmaq_f32 (imag, ai, br); - - yr = vaddq_f32 (yr, real); - yi = vaddq_f32 (yi, imag); - - float32x4x2_t out = { yr, yi }; - vst2q_f32 (Y + idx, out); // interleave back - } - -#endif - - for (; i < complexPairs; ++i) - { - const int ri = i * 2; - const int ii = ri + 1; - - const float ar = A[ri]; - const float ai = A[ii]; - const float br = B[ri]; - const float bi = B[ii]; - - Y[ri] += ar * br - ai * bi; - Y[ii] += ar * bi + ai * br; - } -} - -//============================================================================== - class PartitionedConvolver::FFTLayer { public: @@ -254,7 +151,7 @@ class PartitionedConvolver::FFTLayer const float* H = frequencyPartitions[p].data(); // fftSize_/2 gives the number of complex pairs for real FFT - complexMultiplyAccumulate (X, H, frequencyBuffer.data(), fftSize / 2); + ComplexVectorOperations::multiplyAccumulate (X, H, frequencyBuffer.data(), fftSize / 2); // Move to next older spectrum xIndex++; diff --git a/modules/yup_dsp/metering/yup_LevelProcessor.cpp b/modules/yup_dsp/metering/yup_LevelProcessor.cpp index fca19e17e..f83954102 100644 --- a/modules/yup_dsp/metering/yup_LevelProcessor.cpp +++ b/modules/yup_dsp/metering/yup_LevelProcessor.cpp @@ -73,16 +73,7 @@ void LevelProcessor::processPeak (const float* samples, int numSamples, float& p jassert (samples != nullptr); jassert (numSamples >= 0); - float peak = 0.0f; - - for (int i = 0; i < numSamples; ++i) - { - const float absSample = std::abs (samples[i]); - if (absSample > peak) - peak = absSample; - } - - peakOut = peak; + peakOut = FloatVectorOperations::maxAbs (samples, numSamples); } void LevelProcessor::processRMS (const float* samples, int numSamples, float& rmsOut) noexcept @@ -93,12 +84,7 @@ void LevelProcessor::processRMS (const float* samples, int numSamples, float& rm if (rmsBuffer.empty() || rmsBufferSize == 0) { // Fallback: simple RMS if buffer not initialized - double sumSquares = 0.0; - for (int i = 0; i < numSamples; ++i) - { - const float sample = samples[i]; - sumSquares += sample * sample; - } + const double sumSquares = FloatVectorOperations::sumSquares (samples, numSamples); rmsOut = numSamples > 0 ? static_cast (std::sqrt (sumSquares / numSamples)) : 0.0f; return; } diff --git a/modules/yup_dsp/utilities/yup_DspMath.cpp b/modules/yup_dsp/utilities/yup_DspMath.cpp index b14bade41..8b31c31c3 100644 --- a/modules/yup_dsp/utilities/yup_DspMath.cpp +++ b/modules/yup_dsp/utilities/yup_DspMath.cpp @@ -27,78 +27,7 @@ namespace yup template <> float dotProduct (const float* __restrict a, const float* __restrict b, std::size_t length) noexcept { - float accumulation = 0.0f; - -#if YUP_ENABLE_VDSP - vDSP_dotpr (a, 1, b, 1, &accumulation, length); - -#else - std::size_t i = 0; - -#if YUP_USE_AVX_INTRINSICS && YUP_USE_FMA_INTRINSICS - __m256 vacc = _mm256_setzero_ps(); - for (; i + 8 <= length; i += 8) - { - __m256 va = _mm256_loadu_ps (a + i); - __m256 vb = _mm256_loadu_ps (b + i); - vacc = _mm256_fmadd_ps (va, vb, vacc); - } - __m128 low = _mm256_castps256_ps128 (vacc); - __m128 high = _mm256_extractf128_ps (vacc, 1); - __m128 vsum = _mm_add_ps (low, high); - vsum = _mm_hadd_ps (vsum, vsum); - vsum = _mm_hadd_ps (vsum, vsum); - accumulation += _mm_cvtss_f32 (vsum); - -#elif YUP_USE_SSE_INTRINSICS - __m128 vacc = _mm_setzero_ps(); -#if YUP_USE_FMA_INTRINSICS - for (; i + 4 <= length; i += 4) - { - __m128 va = _mm_loadu_ps (a + i); - __m128 vb = _mm_loadu_ps (b + i); - vacc = _mm_fmadd_ps (va, vb, vacc); - } -#else - for (; i + 4 <= length; i += 4) - { - __m128 va = _mm_loadu_ps (a + i); - __m128 vb = _mm_loadu_ps (b + i); - vacc = _mm_add_ps (vacc, _mm_mul_ps (va, vb)); - } -#endif - __m128 shuf = _mm_shuffle_ps (vacc, vacc, _MM_SHUFFLE (2, 3, 0, 1)); - __m128 sums = _mm_add_ps (vacc, shuf); - shuf = _mm_movehl_ps (shuf, sums); - sums = _mm_add_ss (sums, shuf); - accumulation += _mm_cvtss_f32 (sums); - -#elif YUP_USE_ARM_NEON - float32x4_t vacc = vdupq_n_f32 (0.0f); - for (; i + 4 <= length; i += 4) - { - float32x4_t va = vld1q_f32 (a + i); - float32x4_t vb = vld1q_f32 (b + i); - vacc = vmlaq_f32 (vacc, va, vb); - } -#if YUP_64BIT - accumulation += vaddvq_f32 (vacc); -#else - float32x2_t vlow = vget_low_f32 (vacc); - float32x2_t vhigh = vget_high_f32 (vacc); - float32x2_t vsum2 = vpadd_f32 (vlow, vhigh); - vsum2 = vpadd_f32 (vsum2, vsum2); - accumulation += vget_lane_f32 (vsum2, 0); -#endif - -#endif - - // Handle remaining samples - for (; i < length; ++i) - accumulation += a[i] * b[i]; -#endif - - return accumulation; + return FloatVectorOperations::dotProduct (a, b, length); } } // namespace yup diff --git a/modules/yup_graphics/graphics/yup_Color.h b/modules/yup_graphics/graphics/yup_Color.h index fe6b8abe5..3484d144f 100644 --- a/modules/yup_graphics/graphics/yup_Color.h +++ b/modules/yup_graphics/graphics/yup_Color.h @@ -1176,4 +1176,26 @@ class YUP_API Color }; }; +/** Converts a Color to a normalized RGBA vector. */ +inline Vec4f toVec4f (Color color) noexcept +{ + return { + color.getRedFloat(), + color.getGreenFloat(), + color.getBlueFloat(), + color.getAlphaFloat() + }; +} + +/** Converts a normalized RGBA vector to a Color. */ +inline Color toColor (Vec4f color) noexcept +{ + return { + static_cast (roundToInt (jlimit (0.0f, 1.0f, color.a) * 255.0f)), + static_cast (roundToInt (jlimit (0.0f, 1.0f, color.r) * 255.0f)), + static_cast (roundToInt (jlimit (0.0f, 1.0f, color.g) * 255.0f)), + static_cast (roundToInt (jlimit (0.0f, 1.0f, color.b) * 255.0f)) + }; +} + } // namespace yup diff --git a/modules/yup_graphics/imaging/yup_Image.cpp b/modules/yup_graphics/imaging/yup_Image.cpp index f96475cd2..abdcc623e 100644 --- a/modules/yup_graphics/imaging/yup_Image.cpp +++ b/modules/yup_graphics/imaging/yup_Image.cpp @@ -24,11 +24,6 @@ namespace yup namespace { -uint8 premultiplyComponent (uint8 component, uint8 alpha) noexcept -{ - return static_cast ((static_cast (component) * static_cast (alpha) + 127u) / 255u); -} - uint8 unpremultiplyComponent (uint8 component, uint8 alpha) noexcept { if (alpha == 0) @@ -38,16 +33,6 @@ uint8 unpremultiplyComponent (uint8 component, uint8 alpha) noexcept return static_cast (value > 255u ? 255u : value); } -void writeARGBAsRGBAPremultiplied (uint32 argb, uint8* destination) noexcept -{ - const auto alpha = static_cast ((argb >> 24) & 0xFF); - - destination[0] = premultiplyComponent (static_cast ((argb >> 16) & 0xFF), alpha); - destination[1] = premultiplyComponent (static_cast ((argb >> 8) & 0xFF), alpha); - destination[2] = premultiplyComponent (static_cast (argb & 0xFF), alpha); - destination[3] = alpha; -} - std::unique_ptr copyRGBAPremultipliedAsRGBA (const rive::Bitmap& bitmap) { const auto numPixels = static_cast (bitmap.width()) * static_cast (bitmap.height()); @@ -256,16 +241,26 @@ bool Image::createTextureIfNotPresent (GraphicsContext& context) const if (renderContext == nullptr || renderContext->impl() == nullptr) return false; - std::vector texturePixels (static_cast (width) * static_cast (height) * 4u); - auto* destination = texturePixels.data(); + const auto numPixels = static_cast (width) * static_cast (height); + std::vector texturePixels (numPixels * 4u); + + const auto sourceData = bitmapData->getRawData(); + const auto* source = sourceData.data(); - for (int y = 0; y < height; ++y) + switch (bitmapData->getPixelFormat()) { - for (int x = 0; x < width; ++x) - { - writeARGBAsRGBAPremultiplied (bitmapData->getPixel (x, y), destination); - destination += 4; - } + case PixelFormat::Grayscale: + ColorVectorOperations::convertGrayscaleToRGBA (source, texturePixels.data(), static_cast (numPixels)); + break; + + case PixelFormat::RGB: + ColorVectorOperations::convertRGBToRGBA (source, texturePixels.data(), static_cast (numPixels)); + break; + + case PixelFormat::RGBA: + std::memcpy (texturePixels.data(), source, texturePixels.size()); + ColorVectorOperations::premultiplyRGBA (texturePixels.data(), static_cast (numPixels)); + break; } texture = renderContext->impl()->makeImageTexture ( diff --git a/modules/yup_graphics/primitives/yup_AffineTransform.h b/modules/yup_graphics/primitives/yup_AffineTransform.h index 96625424a..ca264255b 100644 --- a/modules/yup_graphics/primitives/yup_AffineTransform.h +++ b/modules/yup_graphics/primitives/yup_AffineTransform.h @@ -949,6 +949,12 @@ constexpr float get (const AffineTransform& transform) noexcept static_assert (dependentFalse); } +/** Transforms separate x/y point arrays in-place using an AffineTransform. */ +inline void batchTransformPoints (float* xs, float* ys, int numPoints, const AffineTransform& transform) noexcept +{ + AffineTransformOperations::transformPoints (xs, ys, numPoints, transform.getScaleX(), transform.getShearX(), transform.getTranslateX(), transform.getShearY(), transform.getScaleY(), transform.getTranslateY()); +} + } // namespace yup #ifndef DOXYGEN diff --git a/modules/yup_graphics/yup_graphics.h b/modules/yup_graphics/yup_graphics.h index fea89c81a..089d37798 100644 --- a/modules/yup_graphics/yup_graphics.h +++ b/modules/yup_graphics/yup_graphics.h @@ -32,7 +32,7 @@ website: https://github.com/kunitoki/yup license: ISC - dependencies: yup_core rive rive_renderer + dependencies: yup_core yup_simd rive rive_renderer appleFrameworks: Metal searchpaths: native @@ -45,6 +45,7 @@ #define YUP_GRAPHICS_H_INCLUDED #include +#include #include #include diff --git a/modules/yup_simd/buffers/yup_AffineTransformOperations.cpp b/modules/yup_simd/buffers/yup_AffineTransformOperations.cpp new file mode 100644 index 000000000..6d88aa991 --- /dev/null +++ b/modules/yup_simd/buffers/yup_AffineTransformOperations.cpp @@ -0,0 +1,63 @@ +/* + ============================================================================== + + This file is part of the YUP library. + Copyright (c) 2026 - kunitoki@gmail.com + + YUP is an open source library subject to open-source licensing. + + The code included in this file is provided under the terms of the ISC license + http://www.isc.org/downloads/software-support-policy/isc-license. Permission + to use, copy, modify, and/or distribute this software for any purpose with or + without fee is hereby granted provided that the above copyright notice and + this permission notice appear in all copies. + + YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER + EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE + DISCLAIMED. + + ============================================================================== +*/ + +namespace yup +{ + +void YUP_CALLTYPE AffineTransformOperations::transformPoints (float* xs, float* ys, int numPoints, float sx, float shx, float tx, float shy, float sy, float ty) noexcept +{ + transformPoints (xs, ys, xs, ys, numPoints, sx, shx, tx, shy, sy, ty); +} + +void YUP_CALLTYPE AffineTransformOperations::transformPoints (const float* srcXs, const float* srcYs, float* dstXs, float* dstYs, int numPoints, float sx, float shx, float tx, float shy, float sy, float ty) noexcept +{ + int i = 0; + + const auto sx4 = Float4::broadcast (sx); + const auto shx4 = Float4::broadcast (shx); + const auto tx4 = Float4::broadcast (tx); + const auto shy4 = Float4::broadcast (shy); + const auto sy4 = Float4::broadcast (sy); + const auto ty4 = Float4::broadcast (ty); + + for (; i + Float4::size <= numPoints; i += Float4::size) + { + const auto x = Float4::loadUnaligned (srcXs + i); + const auto y = Float4::loadUnaligned (srcYs + i); + + const auto outX = tx4.mulAdd (sx4, x).mulAdd (shx4, y); + const auto outY = ty4.mulAdd (shy4, x).mulAdd (sy4, y); + + outX.storeUnaligned (dstXs + i); + outY.storeUnaligned (dstYs + i); + } + + for (; i < numPoints; ++i) + { + const float x = srcXs[i]; + const float y = srcYs[i]; + + dstXs[i] = sx * x + shx * y + tx; + dstYs[i] = shy * x + sy * y + ty; + } +} + +} // namespace yup diff --git a/modules/yup_simd/buffers/yup_AffineTransformOperations.h b/modules/yup_simd/buffers/yup_AffineTransformOperations.h new file mode 100644 index 000000000..ae68cb2ee --- /dev/null +++ b/modules/yup_simd/buffers/yup_AffineTransformOperations.h @@ -0,0 +1,38 @@ +/* + ============================================================================== + + This file is part of the YUP library. + Copyright (c) 2026 - kunitoki@gmail.com + + YUP is an open source library subject to open-source licensing. + + The code included in this file is provided under the terms of the ISC license + http://www.isc.org/downloads/software-support-policy/isc-license. Permission + to use, copy, modify, and/or distribute this software for any purpose with or + without fee is hereby granted provided that the above copyright notice and + this permission notice appear in all copies. + + YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER + EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE + DISCLAIMED. + + ============================================================================== +*/ + +#pragma once + +namespace yup +{ + +//============================================================================== +class YUP_API AffineTransformOperations +{ +public: + /** Transforms points stored in separate x/y arrays in-place. */ + static void YUP_CALLTYPE transformPoints (float* xs, float* ys, int numPoints, float sx, float shx, float tx, float shy, float sy, float ty) noexcept; + + /** Transforms points stored in separate x/y source arrays into destination arrays. */ + static void YUP_CALLTYPE transformPoints (const float* srcXs, const float* srcYs, float* dstXs, float* dstYs, int numPoints, float sx, float shx, float tx, float shy, float sy, float ty) noexcept; +}; + +} // namespace yup diff --git a/modules/yup_simd/buffers/yup_ColorVectorOperations.cpp b/modules/yup_simd/buffers/yup_ColorVectorOperations.cpp new file mode 100644 index 000000000..dbde342f1 --- /dev/null +++ b/modules/yup_simd/buffers/yup_ColorVectorOperations.cpp @@ -0,0 +1,113 @@ +/* + ============================================================================== + + This file is part of the YUP library. + Copyright (c) 2026 - kunitoki@gmail.com + + YUP is an open source library subject to open-source licensing. + + The code included in this file is provided under the terms of the ISC license + http://www.isc.org/downloads/software-support-policy/isc-license. Permission + to use, copy, modify, and/or distribute this software for any purpose with or + without fee is hereby granted provided that the above copyright notice and + this permission notice appear in all copies. + + YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER + EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE + DISCLAIMED. + + ============================================================================== +*/ + +namespace yup +{ + +namespace +{ +static uint32 premultiplyComponent (uint32 component, uint32 alpha) noexcept +{ + return (component * alpha + 127u) / 255u; +} + +static uint32 packRGBA (uint32 red, uint32 green, uint32 blue, uint32 alpha) noexcept +{ + return red | (green << 8) | (blue << 16) | (alpha << 24); +} +} // namespace + +//============================================================================== +void YUP_CALLTYPE ColorVectorOperations::premultiplyARGB (uint32* pixels, int numPixels) noexcept +{ + for (int i = 0; i < numPixels; ++i) + { + const uint32 pixel = pixels[i]; + const uint32 alpha = (pixel >> 24) & 0xffu; + const uint32 red = premultiplyComponent ((pixel >> 16) & 0xffu, alpha); + const uint32 green = premultiplyComponent ((pixel >> 8) & 0xffu, alpha); + const uint32 blue = premultiplyComponent (pixel & 0xffu, alpha); + + pixels[i] = (alpha << 24) | (red << 16) | (green << 8) | blue; + } +} + +void YUP_CALLTYPE ColorVectorOperations::premultiplyRGBA (uint8* pixels, int numPixels) noexcept +{ + auto* pixel = pixels; + + for (int i = 0; i < numPixels; ++i) + { + const uint32 alpha = pixel[3]; + + pixel[0] = static_cast (premultiplyComponent (pixel[0], alpha)); + pixel[1] = static_cast (premultiplyComponent (pixel[1], alpha)); + pixel[2] = static_cast (premultiplyComponent (pixel[2], alpha)); + pixel += 4; + } +} + +void YUP_CALLTYPE ColorVectorOperations::convertARGBtoRGBA (const uint32* src, uint32* dst, int numPixels) noexcept +{ + for (int i = 0; i < numPixels; ++i) + { + const uint32 pixel = src[i]; + dst[i] = ((pixel & 0x00ffffffu) << 8) | ((pixel >> 24) & 0xffu); + } +} + +void YUP_CALLTYPE ColorVectorOperations::convertGrayscaleToRGBA (const uint8* src, uint8* dst, int numPixels) noexcept +{ + for (int i = 0; i < numPixels; ++i) + { + const uint32 value = *src++; + const auto rgba = packRGBA (value, value, value, 255u); + std::memcpy (dst, &rgba, sizeof (rgba)); + dst += 4; + } +} + +void YUP_CALLTYPE ColorVectorOperations::convertRGBToRGBA (const uint8* src, uint8* dst, int numPixels) noexcept +{ + for (int i = 0; i < numPixels; ++i) + { + const auto rgba = packRGBA (src[0], src[1], src[2], 255u); + std::memcpy (dst, &rgba, sizeof (rgba)); + src += 3; + dst += 4; + } +} + +void YUP_CALLTYPE ColorVectorOperations::lerpRows (const float* rowA, const float* rowB, float* dst, float t, int numPixels) noexcept +{ + const auto t4 = Float4::broadcast (t); + const auto minusOne = Float4::broadcast (-1.0f); + int i = 0; + + for (; i < numPixels; ++i) + { + const auto a = Float4::loadUnaligned (rowA + i * 4); + const auto b = Float4::loadUnaligned (rowB + i * 4); + a.mulAdd (b + (a * minusOne), t4).storeUnaligned (dst + i * 4); + } +} + +} // namespace yup diff --git a/modules/yup_simd/buffers/yup_ColorVectorOperations.h b/modules/yup_simd/buffers/yup_ColorVectorOperations.h new file mode 100644 index 000000000..aabc19982 --- /dev/null +++ b/modules/yup_simd/buffers/yup_ColorVectorOperations.h @@ -0,0 +1,53 @@ +/* + ============================================================================== + + This file is part of the YUP library. + Copyright (c) 2026 - kunitoki@gmail.com + + YUP is an open source library subject to open-source licensing. + + The code included in this file is provided under the terms of the ISC license + http://www.isc.org/downloads/software-support-policy/isc-license. Permission + to use, copy, modify, and/or distribute this software for any purpose with or + without fee is hereby granted provided that the above copyright notice and + this permission notice appear in all copies. + + YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER + EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE + DISCLAIMED. + + ============================================================================== +*/ + +#pragma once + +namespace yup +{ + +//============================================================================== +class YUP_API ColorVectorOperations +{ +public: + /** Premultiplies alpha in-place on a row of packed `0xAARRGGBB` pixels. */ + static void YUP_CALLTYPE premultiplyARGB (uint32* pixels, int numPixels) noexcept; + + /** Premultiplies alpha in-place on a row of RGBA byte pixels. */ + static void YUP_CALLTYPE premultiplyRGBA (uint8* pixels, int numPixels) noexcept; + + /** Converts packed `0xAARRGGBB` pixels to packed `0xRRGGBBAA` pixels. + + The source and destination buffers may alias for in-place conversion. + */ + static void YUP_CALLTYPE convertARGBtoRGBA (const uint32* src, uint32* dst, int numPixels) noexcept; + + /** Expands 8-bit grayscale pixels to RGBA byte pixels with opaque alpha. */ + static void YUP_CALLTYPE convertGrayscaleToRGBA (const uint8* src, uint8* dst, int numPixels) noexcept; + + /** Expands RGB byte pixels to RGBA byte pixels with opaque alpha. */ + static void YUP_CALLTYPE convertRGBToRGBA (const uint8* src, uint8* dst, int numPixels) noexcept; + + /** Blends rows of float RGBA pixels using `dst = rowA + (rowB - rowA) * t`. */ + static void YUP_CALLTYPE lerpRows (const float* rowA, const float* rowB, float* dst, float t, int numPixels) noexcept; +}; + +} // namespace yup diff --git a/modules/yup_simd/buffers/yup_ComplexVectorOperations.cpp b/modules/yup_simd/buffers/yup_ComplexVectorOperations.cpp new file mode 100644 index 000000000..b80fcc185 --- /dev/null +++ b/modules/yup_simd/buffers/yup_ComplexVectorOperations.cpp @@ -0,0 +1,172 @@ +/* + ============================================================================== + + This file is part of the YUP library. + Copyright (c) 2026 - kunitoki@gmail.com + + YUP is an open source library subject to open-source licensing. + + The code included in this file is provided under the terms of the ISC license + http://www.isc.org/downloads/software-support-policy/isc-license. Permission + to use, copy, modify, and/or distribute this software for any purpose with or + without fee is hereby granted provided that the above copyright notice and + this permission notice appear in all copies. + + YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER + EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE + DISCLAIMED. + + ============================================================================== +*/ + +namespace yup +{ + +namespace +{ +void complexMultiply (const float* __restrict a, const float* __restrict b, float* __restrict y, int complexPairs, bool accumulate) noexcept +{ + int i = 0; + +#if YUP_USE_AVX_INTRINSICS && YUP_USE_FMA_INTRINSICS + constexpr int simdWidth = 4; + for (; i <= complexPairs - simdWidth; i += simdWidth) + { + const int idx = i * 2; + + __m256 av = _mm256_loadu_ps (a + idx); + __m256 bv = _mm256_loadu_ps (b + idx); + + const __m256 aShuffled = _mm256_permute_ps (av, _MM_SHUFFLE (2, 3, 0, 1)); + const __m256 bShuffled = _mm256_permute_ps (bv, _MM_SHUFFLE (2, 3, 0, 1)); + + __m256 realPart = _mm256_fmsub_ps (av, bv, _mm256_mul_ps (aShuffled, bShuffled)); + __m256 imagPart = _mm256_fmadd_ps (av, bShuffled, _mm256_mul_ps (aShuffled, bv)); + + __m256 interleaved = _mm256_blend_ps (realPart, imagPart, 0b10101010); + + if (accumulate) + interleaved = _mm256_add_ps (_mm256_loadu_ps (y + idx), interleaved); + + _mm256_storeu_ps (y + idx, interleaved); + } + +#elif YUP_USE_SSE_INTRINSICS + constexpr int simdWidth = 2; + for (; i <= complexPairs - simdWidth; i += simdWidth) + { + const int idx = i * 2; + + __m128 av = _mm_loadu_ps (a + idx); + __m128 bv = _mm_loadu_ps (b + idx); + + const __m128 aShuffled = _mm_shuffle_ps (av, av, _MM_SHUFFLE (2, 3, 0, 1)); + const __m128 bShuffled = _mm_shuffle_ps (bv, bv, _MM_SHUFFLE (2, 3, 0, 1)); + + __m128 realPart = _mm_sub_ps (_mm_mul_ps (av, bv), _mm_mul_ps (aShuffled, bShuffled)); + __m128 imagPart = _mm_add_ps (_mm_mul_ps (av, bShuffled), _mm_mul_ps (aShuffled, bv)); + + __m128 interleaved = _mm_unpacklo_ps (realPart, imagPart); + + if (accumulate) + interleaved = _mm_add_ps (_mm_loadu_ps (y + idx), interleaved); + + _mm_storeu_ps (y + idx, interleaved); + } + +#elif YUP_USE_ARM_NEON + constexpr int simdWidth = 4; + for (; i <= complexPairs - simdWidth; i += simdWidth) + { + const int idx = i * 2; + + float32x4x2_t av = vld2q_f32 (a + idx); + float32x4x2_t bv = vld2q_f32 (b + idx); + + float32x4_t real = vmulq_f32 (av.val[0], bv.val[0]); + real = vfmsq_f32 (real, av.val[1], bv.val[1]); + float32x4_t imag = vmulq_f32 (av.val[0], bv.val[1]); + imag = vfmaq_f32 (imag, av.val[1], bv.val[0]); + + if (accumulate) + { + float32x4x2_t yv = vld2q_f32 (y + idx); + real = vaddq_f32 (yv.val[0], real); + imag = vaddq_f32 (yv.val[1], imag); + } + + float32x4x2_t out = { real, imag }; + vst2q_f32 (y + idx, out); + } + +#endif + + for (; i < complexPairs; ++i) + { + const int realIndex = i * 2; + const int imagIndex = realIndex + 1; + + const float ar = a[realIndex]; + const float ai = a[imagIndex]; + const float br = b[realIndex]; + const float bi = b[imagIndex]; + + const float real = ar * br - ai * bi; + const float imag = ar * bi + ai * br; + + if (accumulate) + { + y[realIndex] += real; + y[imagIndex] += imag; + } + else + { + y[realIndex] = real; + y[imagIndex] = imag; + } + } +} +} // namespace + +//============================================================================== +void YUP_CALLTYPE ComplexVectorOperations::multiplyAccumulate (const float* a, const float* b, float* y, int complexPairs) noexcept +{ + complexMultiply (a, b, y, complexPairs, true); +} + +void YUP_CALLTYPE ComplexVectorOperations::multiply (float* dest, const float* a, const float* b, int complexPairs) noexcept +{ + complexMultiply (a, b, dest, complexPairs, false); +} + +void YUP_CALLTYPE ComplexVectorOperations::powerSpectrum (float* dest, const float* src, int complexPairs) noexcept +{ + int i = 0; + +#if YUP_USE_SSE_INTRINSICS + for (; i + 2 <= complexPairs; i += 2) + { + const __m128 values = _mm_loadu_ps (src + i * 2); + const __m128 squared = _mm_mul_ps (values, values); + alignas (16) float lanes[4]; + _mm_store_ps (lanes, squared); + dest[i] = lanes[0] + lanes[1]; + dest[i + 1] = lanes[2] + lanes[3]; + } +#elif YUP_USE_ARM_NEON + for (; i + 4 <= complexPairs; i += 4) + { + const float32x4x2_t values = vld2q_f32 (src + i * 2); + vst1q_f32 (dest + i, vmlaq_f32 (vmulq_f32 (values.val[0], values.val[0]), values.val[1], values.val[1])); + } +#endif + + for (; i < complexPairs; ++i) + { + const float real = src[i * 2]; + const float imag = src[i * 2 + 1]; + dest[i] = real * real + imag * imag; + } +} + +} // namespace yup diff --git a/modules/yup_simd/buffers/yup_ComplexVectorOperations.h b/modules/yup_simd/buffers/yup_ComplexVectorOperations.h new file mode 100644 index 000000000..ba780cd71 --- /dev/null +++ b/modules/yup_simd/buffers/yup_ComplexVectorOperations.h @@ -0,0 +1,62 @@ +/* + ============================================================================== + + This file is part of the YUP library. + Copyright (c) 2026 - kunitoki@gmail.com + + YUP is an open source library subject to open-source licensing. + + The code included in this file is provided under the terms of the ISC license + http://www.isc.org/downloads/software-support-policy/isc-license. Permission + to use, copy, modify, and/or distribute this software for any purpose with or + without fee is hereby granted provided that the above copyright notice and + this permission notice appear in all copies. + + YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER + EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE + DISCLAIMED. + + ============================================================================== +*/ + +#pragma once + +namespace yup +{ + +//============================================================================== +/** Vector operations for interleaved complex buffers. + + Complex values are stored as `[real, imaginary, real, imaginary, ...]`. +*/ +class YUP_API ComplexVectorOperations +{ +public: + /** Adds the complex product of `a` and `b` to `y`. + + @param a Input complex buffer. + @param b Input complex buffer. + @param y Destination complex buffer, accumulated in-place. + @param complexPairs Number of complex pairs, not number of floats. + */ + static void YUP_CALLTYPE multiplyAccumulate (const float* a, const float* b, float* y, int complexPairs) noexcept; + + /** Multiplies two complex buffers and writes the result to `dest`. + + @param dest Destination complex buffer. + @param a Input complex buffer. + @param b Input complex buffer. + @param complexPairs Number of complex pairs, not number of floats. + */ + static void YUP_CALLTYPE multiply (float* dest, const float* a, const float* b, int complexPairs) noexcept; + + /** Computes the power spectrum of an interleaved complex buffer. + + @param dest Destination buffer receiving `real * real + imaginary * imaginary`. + @param src Input complex buffer. + @param complexPairs Number of complex pairs, not number of floats. + */ + static void YUP_CALLTYPE powerSpectrum (float* dest, const float* src, int complexPairs) noexcept; +}; + +} // namespace yup diff --git a/modules/yup_simd/buffers/yup_FloatVectorOperations.cpp b/modules/yup_simd/buffers/yup_FloatVectorOperations.cpp index 72e9718e2..4bf3ed780 100644 --- a/modules/yup_simd/buffers/yup_FloatVectorOperations.cpp +++ b/modules/yup_simd/buffers/yup_FloatVectorOperations.cpp @@ -1508,6 +1508,156 @@ double findMaximum (const double* src, Size num) noexcept #endif } +template +Type sumSquares (const Type* src, Size num) noexcept +{ + if (num <= 0) + return {}; + +#if YUP_USE_VDSP_FRAMEWORK + Type result = {}; + + if constexpr (std::is_same_v) + vDSP_svesq (src, 1, &result, (vDSP_Length) num); + else + vDSP_svesqD (src, 1, &result, (vDSP_Length) num); + + return result; +#else + Type result = {}; + Size i = 0; + +#if YUP_USE_SSE_INTRINSICS || YUP_USE_ARM_NEON + using Mode = typename FloatVectorHelpers::ModeType::Mode; + + typename Mode::ParallelType accumulator = Mode::load1 (Type {}); + + for (; i + Mode::numParallel <= num; i += Mode::numParallel) + { + const auto values = Mode::loadU (src + i); + accumulator = Mode::add (accumulator, Mode::mul (values, values)); + } + + Type lanes[Mode::numParallel]; + Mode::storeU (lanes, accumulator); + + for (int lane = 0; lane < Mode::numParallel; ++lane) + result += lanes[lane]; +#endif + + for (; i < num; ++i) + result += src[i] * src[i]; + + return result; +#endif +} + +template +Type dotProduct (const Type* src1, const Type* src2, Size num) noexcept +{ + if (num <= 0) + return {}; + +#if YUP_USE_VDSP_FRAMEWORK + Type result = {}; + + if constexpr (std::is_same_v) + vDSP_dotpr (src1, 1, src2, 1, &result, (vDSP_Length) num); + else + vDSP_dotprD (src1, 1, src2, 1, &result, (vDSP_Length) num); + + return result; +#else + Type result = {}; + Size i = 0; + +#if YUP_USE_SSE_INTRINSICS || YUP_USE_ARM_NEON + using Mode = typename FloatVectorHelpers::ModeType::Mode; + + typename Mode::ParallelType accumulator = Mode::load1 (Type {}); + + for (; i + Mode::numParallel <= num; i += Mode::numParallel) + { + const auto values1 = Mode::loadU (src1 + i); + const auto values2 = Mode::loadU (src2 + i); + accumulator = Mode::add (accumulator, Mode::mul (values1, values2)); + } + + Type lanes[Mode::numParallel]; + Mode::storeU (lanes, accumulator); + + for (int lane = 0; lane < Mode::numParallel; ++lane) + result += lanes[lane]; +#endif + + for (; i < num; ++i) + result += src1[i] * src2[i]; + + return result; +#endif +} + +template +Type rms (const Type* src, Size num) noexcept +{ + return num > 0 ? static_cast (std::sqrt (sumSquares (src, num) / static_cast (num))) : Type {}; +} + +template +Type maxAbs (const Type* src, Size num) noexcept +{ + if (num <= 0) + return {}; + +#if YUP_USE_VDSP_FRAMEWORK + Type result = {}; + + if constexpr (std::is_same_v) + vDSP_maxmgv (src, 1, &result, (vDSP_Length) num); + else + vDSP_maxmgvD (src, 1, &result, (vDSP_Length) num); + + return result; +#else + Type result = {}; + Size i = 0; + +#if YUP_USE_SSE_INTRINSICS || YUP_USE_ARM_NEON + using Mode = typename FloatVectorHelpers::ModeType::Mode; + + typename Mode::ParallelType accumulator = Mode::load1 (Type {}); + + if constexpr (std::is_same_v) + { + [[maybe_unused]] FloatVectorHelpers::signMask32 signMask; + signMask.i = 0x7fffffffUL; + + const auto mask = Mode::load1 (signMask.f); + + for (; i + Mode::numParallel <= num; i += Mode::numParallel) + accumulator = Mode::max (accumulator, Mode::bit_and (Mode::loadU (src + i), mask)); + } + else + { + [[maybe_unused]] FloatVectorHelpers::signMask64 signMask; + signMask.i = 0x7fffffffffffffffULL; + + const auto mask = Mode::load1 (signMask.d); + + for (; i + Mode::numParallel <= num; i += Mode::numParallel) + accumulator = Mode::max (accumulator, Mode::bit_and (Mode::loadU (src + i), mask)); + } + + result = Mode::max (accumulator); +#endif + + for (; i < num; ++i) + result = jmax (result, static_cast (std::abs (src[i]))); + + return result; +#endif +} + template void convertFixedToFloat (float* dest, const int* src, float multiplier, Size num) noexcept { @@ -2044,6 +2194,35 @@ FloatType YUP_CALLTYPE FloatVectorOperationsBase::findMaxi return FloatVectorHelpers::findMaximum (src, numValues); } +template +FloatType YUP_CALLTYPE FloatVectorOperationsBase::sumSquares (const FloatType* src, + CountType numValues) noexcept +{ + return FloatVectorHelpers::sumSquares (src, numValues); +} + +template +FloatType YUP_CALLTYPE FloatVectorOperationsBase::rms (const FloatType* src, + CountType numValues) noexcept +{ + return FloatVectorHelpers::rms (src, numValues); +} + +template +FloatType YUP_CALLTYPE FloatVectorOperationsBase::dotProduct (const FloatType* src1, + const FloatType* src2, + CountType numValues) noexcept +{ + return FloatVectorHelpers::dotProduct (src1, src2, numValues); +} + +template +FloatType YUP_CALLTYPE FloatVectorOperationsBase::maxAbs (const FloatType* src, + CountType numValues) noexcept +{ + return FloatVectorHelpers::maxAbs (src, numValues); +} + //============================================================================== template struct FloatVectorOperationsBase; diff --git a/modules/yup_simd/buffers/yup_FloatVectorOperations.h b/modules/yup_simd/buffers/yup_FloatVectorOperations.h index 37bfe636c..5b9d824ab 100644 --- a/modules/yup_simd/buffers/yup_FloatVectorOperations.h +++ b/modules/yup_simd/buffers/yup_FloatVectorOperations.h @@ -173,6 +173,18 @@ struct FloatVectorOperationsBase /** Finds the maximum value in the given array. */ static FloatType YUP_CALLTYPE findMaximum (const FloatType* src, CountType numValues) noexcept; + + /** Returns the sum of the squared values in the given array. */ + static FloatType YUP_CALLTYPE sumSquares (const FloatType* src, CountType numValues) noexcept; + + /** Returns the root mean square of the values in the given array. */ + static FloatType YUP_CALLTYPE rms (const FloatType* src, CountType numValues) noexcept; + + /** Returns the dot product of two arrays. */ + static FloatType YUP_CALLTYPE dotProduct (const FloatType* src1, const FloatType* src2, CountType numValues) noexcept; + + /** Returns the maximum absolute value in the given array. */ + static FloatType YUP_CALLTYPE maxAbs (const FloatType* src, CountType numValues) noexcept; }; #if ! DOXYGEN @@ -201,7 +213,11 @@ struct NameForwarder : public Bases... Bases::clip..., Bases::findMinAndMax..., Bases::findMinimum..., - Bases::findMaximum...; + Bases::findMaximum..., + Bases::sumSquares..., + Bases::rms..., + Bases::dotProduct..., + Bases::maxAbs...; }; } // namespace detail diff --git a/modules/yup_simd/types/yup_EigenAdapters.h b/modules/yup_simd/types/yup_EigenAdapters.h new file mode 100644 index 000000000..3839ae4b6 --- /dev/null +++ b/modules/yup_simd/types/yup_EigenAdapters.h @@ -0,0 +1,43 @@ +/* + ============================================================================== + + This file is part of the YUP library. + Copyright (c) 2026 - kunitoki@gmail.com + + YUP is an open source library subject to open-source licensing. + + The code included in this file is provided under the terms of the ISC license + http://www.isc.org/downloads/software-support-policy/isc-license. Permission + to use, copy, modify, and/or distribute this software for any purpose with or + without fee is hereby granted provided that the above copyright notice and + this permission notice appear in all copies. + + YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER + EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE + DISCLAIMED. + + ============================================================================== +*/ + +#pragma once + +#if YUP_MODULE_AVAILABLE_eigen + +namespace yup +{ + +inline Eigen::Matrix3f toEigenMatrix (float sx, float shx, float tx, float shy, float sy, float ty) noexcept +{ + Eigen::Matrix3f matrix; + matrix << sx, shx, tx, shy, sy, ty, 0.0f, 0.0f, 1.0f; + return matrix; +} + +inline Eigen::Vector2f toEigenVector (float x, float y) noexcept +{ + return Eigen::Vector2f (x, y); +} + +} // namespace yup + +#endif diff --git a/modules/yup_simd/types/yup_SIMDRegister.h b/modules/yup_simd/types/yup_SIMDRegister.h new file mode 100644 index 000000000..7ff43bd2a --- /dev/null +++ b/modules/yup_simd/types/yup_SIMDRegister.h @@ -0,0 +1,390 @@ +/* + ============================================================================== + + This file is part of the YUP library. + Copyright (c) 2026 - kunitoki@gmail.com + + YUP is an open source library subject to open-source licensing. + + The code included in this file is provided under the terms of the ISC license + http://www.isc.org/downloads/software-support-policy/isc-license. Permission + to use, copy, modify, and/or distribute this software for any purpose with or + without fee is hereby granted provided that the above copyright notice and + this permission notice appear in all copies. + + YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER + EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE + DISCLAIMED. + + ============================================================================== +*/ + +#pragma once + +namespace yup +{ + +//============================================================================== +/** Fixed-width SIMD-style register backed by xsimd batches. + + The lane count is part of the type, which keeps code portable across platforms + where the native SIMD width differs. + + @tparam T The element type. Must be supported by xsimd batches. + @tparam N The number of elements. Must be greater than 0. +*/ +template +class SIMDRegister +{ + static_assert (N > 0, "SIMDRegister must contain at least one element"); + +public: + //============================================================================== + /** The element type of the SIMD register. */ + using ElementType = T; + + /** The number of elements in the SIMD register. */ + static constexpr int size = N; + + //============================================================================== + /** Default constructor. Initializes all elements to zero. */ + SIMDRegister() noexcept + { + for (auto& batch : data) + batch = Batch (T {}); + } + + /** Constructs a SIMD register with all elements set to the given scalar value. + + @param scalar The value to broadcast to all elements of the register. + */ + explicit SIMDRegister (T scalar) noexcept + { + for (auto& batch : data) + batch = Batch (scalar); + } + + /** Constructs a SIMD register by loading elements from a pointer. + + @param ptr The pointer to the elements to load. + */ + explicit SIMDRegister (const T* ptr) noexcept + { + load (ptr, false); + } + + //============================================================================== + /** Loads a SIMD register from a pointer, assuming the pointer is aligned. + + @param ptr The pointer to the elements to load. Must be aligned to the SIMD width. + + @returns A SIMD register containing the loaded elements. + */ + static forcedinline SIMDRegister loadAligned (const T* ptr) noexcept + { + SIMDRegister result; + result.load (ptr, true); + return result; + } + + /** Loads a SIMD register from a pointer, assuming the pointer is not aligned. + + @param ptr The pointer to the elements to load. May be unaligned. + + @returns A SIMD register containing the loaded elements. + */ + static forcedinline SIMDRegister loadUnaligned (const T* ptr) noexcept + { + SIMDRegister result; + result.load (ptr, false); + return result; + } + + //============================================================================== + /** Constructs a SIMD register with all elements set to the given scalar value. + + @param scalar The value to broadcast to all elements of the register. + + @returns A SIMD register with all elements set to the given scalar value. + */ + static forcedinline SIMDRegister broadcast (T scalar) noexcept + { + return SIMDRegister (scalar); + } + + //============================================================================== + /** Constructs a SIMD register with all elements set to zero. + + @returns A SIMD register with all elements set to zero. + */ + static forcedinline SIMDRegister zero() noexcept + { + return SIMDRegister(); + } + + //============================================================================== + /** Stores the SIMD register to a pointer, assuming the pointer is aligned. + + @param ptr The pointer to store the elements. Must be aligned to the SIMD width. + */ + forcedinline void storeAligned (T* ptr) const noexcept + { + store (ptr, true); + } + + /** Stores the SIMD register to a pointer, assuming the pointer is not aligned. + + @param ptr The pointer to store the elements. May be unaligned. + */ + forcedinline void storeUnaligned (T* ptr) const noexcept + { + store (ptr, false); + } + + //============================================================================== + /** Accesses an element of the SIMD register by index. + + @param i The index of the element to access. Must be in the range [0, N). + + @returns The value of the element at the specified index. + */ + forcedinline T operator[] (int i) const noexcept + { + jassert (i >= 0 && i < N); + + alignas (64) T lanes[nativeSize]; + data[static_cast (i) / nativeSize].store_unaligned (lanes); + return lanes[static_cast (i) % nativeSize]; + } + + //============================================================================== + /** Adds two SIMD registers element-wise. + + @param other The SIMD register to add. + + @returns A SIMD register containing the element-wise sum. + */ + forcedinline SIMDRegister operator+ (SIMDRegister other) const noexcept + { + SIMDRegister result; + for (std::size_t i = 0; i < numBatches; ++i) + result.data[i] = data[i] + other.data[i]; + return result; + } + + /** Subtracts two SIMD registers element-wise. + + @param other The SIMD register to subtract. + + @returns A SIMD register containing the element-wise difference. + */ + forcedinline SIMDRegister operator- (SIMDRegister other) const noexcept + { + SIMDRegister result; + for (std::size_t i = 0; i < numBatches; ++i) + result.data[i] = data[i] - other.data[i]; + return result; + } + + /** Multiplies two SIMD registers element-wise. + + @param other The SIMD register to multiply. + + @returns A SIMD register containing the element-wise product. + */ + forcedinline SIMDRegister operator* (SIMDRegister other) const noexcept + { + SIMDRegister result; + for (std::size_t i = 0; i < numBatches; ++i) + result.data[i] = data[i] * other.data[i]; + return result; + } + + /** Divides two SIMD registers element-wise. + + @param other The SIMD register to divide by. + + @returns A SIMD register containing the element-wise quotient. + */ + forcedinline SIMDRegister operator/ (SIMDRegister other) const noexcept + { + SIMDRegister result; + for (std::size_t i = 0; i < numBatches; ++i) + result.data[i] = data[i] / other.data[i]; + return result; + } + + /** Adds another SIMD register to this one element-wise. + + @param other The SIMD register to add. + + @returns A reference to this SIMD register after the addition. + */ + forcedinline SIMDRegister& operator+= (SIMDRegister other) noexcept + { + *this = *this + other; + return *this; + } + + /** Multiplies another SIMD register with this one element-wise. + + @param other The SIMD register to multiply. + + @returns A reference to this SIMD register after the multiplication. + */ + forcedinline SIMDRegister& operator*= (SIMDRegister other) noexcept + { + *this = *this * other; + return *this; + } + + //============================================================================== + /** Performs a fused multiply-add operation: this + (a * b). + + @param a The first SIMD register to multiply. + @param b The second SIMD register to multiply. + + @returns A SIMD register containing the result of the fused multiply-add. + */ + forcedinline SIMDRegister mulAdd (SIMDRegister a, SIMDRegister b) const noexcept + { + return *this + a * b; + } + + //============================================================================== + /** Computes the absolute value of each element in the SIMD register. + + @returns A SIMD register containing the absolute values of the elements. + */ + forcedinline SIMDRegister abs() const noexcept + { + SIMDRegister result; + for (std::size_t i = 0; i < numBatches; ++i) + result.data[i] = xsimd::abs (data[i]); + return result; + } + + //============================================================================== + /** Computes the element-wise minimum of two SIMD registers. + + @param other The SIMD register to compare with. + + @returns A SIMD register containing the element-wise minimum. + */ + forcedinline SIMDRegister min (SIMDRegister other) const noexcept + { + SIMDRegister result; + for (std::size_t i = 0; i < numBatches; ++i) + result.data[i] = xsimd::min (data[i], other.data[i]); + return result; + } + + /** Computes the element-wise maximum of two SIMD registers. + + @param other The SIMD register to compare with. + + @returns A SIMD register containing the element-wise maximum. + */ + forcedinline SIMDRegister max (SIMDRegister other) const noexcept + { + SIMDRegister result; + for (std::size_t i = 0; i < numBatches; ++i) + result.data[i] = xsimd::max (data[i], other.data[i]); + return result; + } + + //============================================================================== + /** Computes the sum of all elements in the SIMD register. + + @returns The sum of the elements. + */ + forcedinline T sum() const noexcept + { + T result {}; + for (int i = 0; i < N; ++i) + result += (*this)[i]; + return result; + } + + //============================================================================== + /** Computes the maximum value among all elements in the SIMD register. + + @returns The maximum value among the elements. + */ + forcedinline T hmax() const noexcept + { + T result = (*this)[0]; + for (int i = 1; i < N; ++i) + result = jmax (result, (*this)[i]); + return result; + } + +private: + using Batch = xsimd::batch; + + static constexpr std::size_t nativeSize = Batch::size; + static constexpr std::size_t numBatches = (static_cast (N) + nativeSize - 1) / nativeSize; + + forcedinline void load (const T* ptr, bool aligned) noexcept + { + std::size_t offset = 0; + + for (std::size_t batchIndex = 0; batchIndex < numBatches; ++batchIndex) + { + if (offset + nativeSize <= static_cast (N)) + { + data[batchIndex] = aligned ? Batch::load_aligned (ptr + offset) + : Batch::load_unaligned (ptr + offset); + } + else + { + alignas (64) T lanes[nativeSize] = {}; + const auto remaining = static_cast (N) - offset; + + for (std::size_t i = 0; i < remaining; ++i) + lanes[i] = ptr[offset + i]; + + data[batchIndex] = Batch::load_unaligned (lanes); + } + + offset += nativeSize; + } + } + + forcedinline void store (T* ptr, bool aligned) const noexcept + { + std::size_t offset = 0; + + for (std::size_t batchIndex = 0; batchIndex < numBatches; ++batchIndex) + { + if (offset + nativeSize <= static_cast (N)) + { + if (aligned) + data[batchIndex].store_aligned (ptr + offset); + else + data[batchIndex].store_unaligned (ptr + offset); + } + else + { + alignas (64) T lanes[nativeSize]; + data[batchIndex].store_unaligned (lanes); + + const auto remaining = static_cast (N) - offset; + + for (std::size_t i = 0; i < remaining; ++i) + ptr[offset + i] = lanes[i]; + } + + offset += nativeSize; + } + } + + std::array data; +}; + +using Float4 = SIMDRegister; +using Float8 = SIMDRegister; +using Double2 = SIMDRegister; +using Double4 = SIMDRegister; + +} // namespace yup diff --git a/modules/yup_simd/types/yup_Vec.h b/modules/yup_simd/types/yup_Vec.h new file mode 100644 index 000000000..6e6157fc8 --- /dev/null +++ b/modules/yup_simd/types/yup_Vec.h @@ -0,0 +1,137 @@ +/* + ============================================================================== + + This file is part of the YUP library. + Copyright (c) 2026 - kunitoki@gmail.com + + YUP is an open source library subject to open-source licensing. + + The code included in this file is provided under the terms of the ISC license + http://www.isc.org/downloads/software-support-policy/isc-license. Permission + to use, copy, modify, and/or distribute this software for any purpose with or + without fee is hereby granted provided that the above copyright notice and + this permission notice appear in all copies. + + YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER + EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE + DISCLAIMED. + + ============================================================================== +*/ + +#pragma once + +namespace yup +{ + +//============================================================================== +struct alignas (8) Vec2f +{ + float x = 0.0f; + float y = 0.0f; + + constexpr Vec2f() noexcept = default; + + constexpr Vec2f (float xIn, float yIn) noexcept + : x (xIn) + , y (yIn) + { + } + + constexpr Vec2f operator+ (Vec2f other) const noexcept + { + return { x + other.x, y + other.y }; + } + + constexpr Vec2f operator* (float scalar) const noexcept + { + return { x * scalar, y * scalar }; + } + + constexpr float dot (Vec2f other) const noexcept + { + return x * other.x + y * other.y; + } + + float length() const noexcept + { + return std::sqrt (dot (*this)); + } + + Vec2f normalized() const noexcept + { + const auto len = length(); + return len > 0.0f ? *this * (1.0f / len) : Vec2f(); + } +}; + +//============================================================================== +struct alignas (16) Vec4f +{ + float r = 0.0f; + float g = 0.0f; + float b = 0.0f; + float a = 0.0f; + + constexpr Vec4f() noexcept = default; + + constexpr Vec4f (float rIn, float gIn, float bIn, float aIn) noexcept + : r (rIn) + , g (gIn) + , b (bIn) + , a (aIn) + { + } + + Vec4f operator+ (Vec4f other) const noexcept + { + alignas (16) float result[4]; + (Float4::loadUnaligned (data()) + Float4::loadUnaligned (other.data())).storeUnaligned (result); + return load (result); + } + + Vec4f operator* (float scalar) const noexcept + { + alignas (16) float result[4]; + (Float4::loadUnaligned (data()) * Float4::broadcast (scalar)).storeUnaligned (result); + return load (result); + } + + Vec4f operator* (Vec4f other) const noexcept + { + alignas (16) float result[4]; + (Float4::loadUnaligned (data()) * Float4::loadUnaligned (other.data())).storeUnaligned (result); + return load (result); + } + + Vec4f lerp (Vec4f other, float t) const noexcept + { + return *this + (other + (*this * -1.0f)) * t; + } + + constexpr Vec4f premultiplied() const noexcept + { + return { r * a, g * a, b * a, a }; + } + + static Vec4f load (const float* ptr) noexcept + { + return { ptr[0], ptr[1], ptr[2], ptr[3] }; + } + + void store (float* ptr) const noexcept + { + ptr[0] = r; + ptr[1] = g; + ptr[2] = b; + ptr[3] = a; + } + +private: + const float* data() const noexcept + { + return std::addressof (r); + } +}; + +} // namespace yup diff --git a/modules/yup_simd/yup_simd.cpp b/modules/yup_simd/yup_simd.cpp index ae49b2ef7..991fdc145 100644 --- a/modules/yup_simd/yup_simd.cpp +++ b/modules/yup_simd/yup_simd.cpp @@ -53,3 +53,6 @@ #endif #include "buffers/yup_FloatVectorOperations.cpp" +#include "buffers/yup_ComplexVectorOperations.cpp" +#include "buffers/yup_AffineTransformOperations.cpp" +#include "buffers/yup_ColorVectorOperations.cpp" diff --git a/modules/yup_simd/yup_simd.h b/modules/yup_simd/yup_simd.h index 9520b154e..0dd8b172d 100644 --- a/modules/yup_simd/yup_simd.h +++ b/modules/yup_simd/yup_simd.h @@ -50,7 +50,7 @@ website: https://github.com/kunitoki/yup license: ISC - dependencies: yup_core + dependencies: yup_core xsimd appleFrameworks: Accelerate END_YUP_MODULE_DECLARATION @@ -62,6 +62,7 @@ #define YUP_SIMD_H_INCLUDED #include +#include //============================================================================== #ifndef YUP_USE_SSE_INTRINSICS @@ -126,11 +127,30 @@ #endif #endif +//============================================================================== +#if YUP_MODULE_AVAILABLE_eigen +#include +#endif + //============================================================================== #include +#include +#include +#include #include //============================================================================== YUP_BEGIN_IGNORE_WARNINGS_MSVC (4661) + +#include "types/yup_SIMDRegister.h" +#include "types/yup_Vec.h" #include "buffers/yup_FloatVectorOperations.h" +#include "buffers/yup_ComplexVectorOperations.h" +#include "buffers/yup_AffineTransformOperations.h" +#include "buffers/yup_ColorVectorOperations.h" + +#if YUP_MODULE_AVAILABLE_eigen +#include "types/yup_EigenAdapters.h" +#endif + YUP_END_IGNORE_WARNINGS_MSVC diff --git a/tests/yup_graphics/yup_Image.cpp b/tests/yup_graphics/yup_Image.cpp index 958abce36..424c72506 100644 --- a/tests/yup_graphics/yup_Image.cpp +++ b/tests/yup_graphics/yup_Image.cpp @@ -74,6 +74,60 @@ TEST (ImageTests, GrayscaleBitmapStoresLuminanceAndReturnsOpaqueARGBColor) EXPECT_EQ (image.getPixel (0, 0), 0xff363636u); } +TEST (ImageTests, GrayscaleBitmapConvertsToOpaqueRGBATextureBytes) +{ + Image image (3, 1, PixelFormat::Grayscale); + + auto raw = image.getRawData(); + ASSERT_EQ (raw.size(), 3u); + raw[0] = 0; + raw[1] = 127; + raw[2] = 255; + + uint8 textureBytes[12] = {}; + ColorVectorOperations::convertGrayscaleToRGBA (raw.data(), textureBytes, 3); + + const uint8 expected[] = { 0, 0, 0, 255, 127, 127, 127, 255, 255, 255, 255, 255 }; + + for (size_t i = 0; i < std::size (expected); ++i) + EXPECT_EQ (textureBytes[i], expected[i]); +} + +TEST (ImageTests, RgbBitmapConvertsToOpaqueRGBATextureBytes) +{ + Image image (2, 1, PixelFormat::RGB); + + image.setPixel (0, 0, 0x80123456); + image.setPixel (1, 0, 0xffabcdef); + + const auto raw = image.getRawData(); + uint8 textureBytes[8] = {}; + ColorVectorOperations::convertRGBToRGBA (raw.data(), textureBytes, 2); + + const uint8 expected[] = { 0x12, 0x34, 0x56, 0xff, 0xab, 0xcd, 0xef, 0xff }; + + for (size_t i = 0; i < std::size (expected); ++i) + EXPECT_EQ (textureBytes[i], expected[i]); +} + +TEST (ImageTests, RgbaBitmapConvertsToPremultipliedRGBATextureBytes) +{ + Image image (2, 1, PixelFormat::RGBA); + + image.setPixel (0, 0, 0x80102040); + image.setPixel (1, 0, 0xff010203); + + const auto raw = image.getRawData(); + uint8 textureBytes[8] = {}; + std::memcpy (textureBytes, raw.data(), raw.size()); + ColorVectorOperations::premultiplyRGBA (textureBytes, 2); + + const uint8 expected[] = { 8, 16, 32, 128, 1, 2, 3, 255 }; + + for (size_t i = 0; i < std::size (expected); ++i) + EXPECT_EQ (textureBytes[i], expected[i]); +} + TEST (ImageTests, ColorCanConvertToExplicitPackedByteOrders) { const Color color (0x80123456); diff --git a/tests/yup_simd.cpp b/tests/yup_simd.cpp index cac23a400..16a7e4301 100644 --- a/tests/yup_simd.cpp +++ b/tests/yup_simd.cpp @@ -20,3 +20,7 @@ */ #include "yup_simd/yup_FloatVectorOperations.cpp" +#include "yup_simd/yup_ComplexVectorOperations.cpp" +#include "yup_simd/yup_AffineTransformOperations.cpp" +#include "yup_simd/yup_ColorVectorOperations.cpp" +#include "yup_simd/yup_SIMDRegister.cpp" diff --git a/tests/yup_simd/yup_AffineTransformOperations.cpp b/tests/yup_simd/yup_AffineTransformOperations.cpp new file mode 100644 index 000000000..8e234a98a --- /dev/null +++ b/tests/yup_simd/yup_AffineTransformOperations.cpp @@ -0,0 +1,50 @@ +/* + ============================================================================== + + This file is part of the YUP library. + Copyright (c) 2026 - kunitoki@gmail.com + + YUP is an open source library subject to open-source licensing. + + The code included in this file is provided under the terms of the ISC license + http://www.isc.org/downloads/software-support-policy/isc-license. Permission + to use, copy, modify, and/or distribute this software for any purpose with or + without fee is hereby granted provided that the above copyright notice and + this permission notice appear in all copies. + + YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER + EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE + DISCLAIMED. + + ============================================================================== +*/ + +#include + +#include + +using namespace yup; + +TEST (AffineTransformOpsTests, TransformPointsMatchesScalarReference) +{ + constexpr int numPoints = 6; + const float srcXs[numPoints] = { -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f }; + const float srcYs[numPoints] = { 3.0f, 2.0f, 1.0f, 0.0f, -1.0f, -2.0f }; + float dstXs[numPoints] = {}; + float dstYs[numPoints] = {}; + + constexpr float sx = 2.0f; + constexpr float shx = -0.5f; + constexpr float tx = 4.0f; + constexpr float shy = 0.25f; + constexpr float sy = -3.0f; + constexpr float ty = 1.0f; + + AffineTransformOperations::transformPoints (srcXs, srcYs, dstXs, dstYs, numPoints, sx, shx, tx, shy, sy, ty); + + for (int i = 0; i < numPoints; ++i) + { + EXPECT_NEAR (dstXs[i], sx * srcXs[i] + shx * srcYs[i] + tx, 1.0e-5f); + EXPECT_NEAR (dstYs[i], shy * srcXs[i] + sy * srcYs[i] + ty, 1.0e-5f); + } +} diff --git a/tests/yup_simd/yup_ColorVectorOperations.cpp b/tests/yup_simd/yup_ColorVectorOperations.cpp new file mode 100644 index 000000000..26e5cc270 --- /dev/null +++ b/tests/yup_simd/yup_ColorVectorOperations.cpp @@ -0,0 +1,128 @@ +/* + ============================================================================== + + This file is part of the YUP library. + Copyright (c) 2026 - kunitoki@gmail.com + + YUP is an open source library subject to open-source licensing. + + The code included in this file is provided under the terms of the ISC license + http://www.isc.org/downloads/software-support-policy/isc-license. Permission + to use, copy, modify, and/or distribute this software for any purpose with or + without fee is hereby granted provided that the above copyright notice and + this permission notice appear in all copies. + + YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER + EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE + DISCLAIMED. + + ============================================================================== +*/ + +#include + +#include + +using namespace yup; + +TEST (ColorVectorOpsTests, PremultiplyARGBMatchesScalarReference) +{ + uint32_t pixels[] = { 0x80ff8040u, 0xff010203u, 0x00010203u }; + + ColorVectorOperations::premultiplyARGB (pixels, 3); + + EXPECT_EQ (pixels[0], 0x80804020u); + EXPECT_EQ (pixels[1], 0xff010203u); + EXPECT_EQ (pixels[2], 0x00000000u); +} + +TEST (ColorVectorOpsTests, PremultiplyRGBAMatchesScalarReference) +{ + uint8 pixels[] = { 255, 128, 64, 128, 1, 2, 3, 255, 1, 2, 3, 0 }; + + ColorVectorOperations::premultiplyRGBA (pixels, 3); + + EXPECT_EQ (pixels[0], 128); + EXPECT_EQ (pixels[1], 64); + EXPECT_EQ (pixels[2], 32); + EXPECT_EQ (pixels[3], 128); + EXPECT_EQ (pixels[4], 1); + EXPECT_EQ (pixels[5], 2); + EXPECT_EQ (pixels[6], 3); + EXPECT_EQ (pixels[7], 255); + EXPECT_EQ (pixels[8], 0); + EXPECT_EQ (pixels[9], 0); + EXPECT_EQ (pixels[10], 0); + EXPECT_EQ (pixels[11], 0); +} + +TEST (ColorVectorOpsTests, ConvertARGBtoRGBA) +{ + const uint32_t argb[] = { 0x12345678u, 0xaabbccddu }; + uint32_t rgba[2] = {}; + + ColorVectorOperations::convertARGBtoRGBA (argb, rgba, 2); + + EXPECT_EQ (rgba[0], 0x34567812u); + EXPECT_EQ (rgba[1], 0xbbccddaau); +} + +TEST (ColorVectorOpsTests, ConvertARGBtoRGBAInPlace) +{ + uint32_t pixels[] = { 0x12345678u, 0xaabbccddu }; + + ColorVectorOperations::convertARGBtoRGBA (pixels, pixels, 2); + + EXPECT_EQ (pixels[0], 0x34567812u); + EXPECT_EQ (pixels[1], 0xbbccddaau); +} + +TEST (ColorVectorOpsTests, ConvertGrayscaleToRGBA) +{ + const uint8 gray[] = { 0, 127, 255 }; + uint8 rgba[12] = {}; + + ColorVectorOperations::convertGrayscaleToRGBA (gray, rgba, 3); + + EXPECT_EQ (rgba[0], 0); + EXPECT_EQ (rgba[1], 0); + EXPECT_EQ (rgba[2], 0); + EXPECT_EQ (rgba[3], 255); + EXPECT_EQ (rgba[4], 127); + EXPECT_EQ (rgba[5], 127); + EXPECT_EQ (rgba[6], 127); + EXPECT_EQ (rgba[7], 255); + EXPECT_EQ (rgba[8], 255); + EXPECT_EQ (rgba[9], 255); + EXPECT_EQ (rgba[10], 255); + EXPECT_EQ (rgba[11], 255); +} + +TEST (ColorVectorOpsTests, ConvertRGBToRGBA) +{ + const uint8 rgb[] = { 1, 2, 3, 4, 5, 6 }; + uint8 rgba[8] = {}; + + ColorVectorOperations::convertRGBToRGBA (rgb, rgba, 2); + + EXPECT_EQ (rgba[0], 1); + EXPECT_EQ (rgba[1], 2); + EXPECT_EQ (rgba[2], 3); + EXPECT_EQ (rgba[3], 255); + EXPECT_EQ (rgba[4], 4); + EXPECT_EQ (rgba[5], 5); + EXPECT_EQ (rgba[6], 6); + EXPECT_EQ (rgba[7], 255); +} + +TEST (ColorVectorOpsTests, LerpRowsMatchesScalarReference) +{ + const float rowA[] = { 0.0f, 0.25f, 0.5f, 0.75f, 1.0f, 0.5f, 0.0f, 1.0f }; + const float rowB[] = { 1.0f, 0.75f, 0.5f, 0.25f, 0.0f, 0.25f, 0.5f, 0.75f }; + float dst[8] = {}; + + ColorVectorOperations::lerpRows (rowA, rowB, dst, 0.25f, 2); + + for (int i = 0; i < 8; ++i) + EXPECT_NEAR (dst[i], rowA[i] + (rowB[i] - rowA[i]) * 0.25f, 1.0e-5f); +} diff --git a/tests/yup_simd/yup_ComplexVectorOperations.cpp b/tests/yup_simd/yup_ComplexVectorOperations.cpp new file mode 100644 index 000000000..f4c578729 --- /dev/null +++ b/tests/yup_simd/yup_ComplexVectorOperations.cpp @@ -0,0 +1,93 @@ +/* + ============================================================================== + + This file is part of the YUP library. + Copyright (c) 2026 - kunitoki@gmail.com + + YUP is an open source library subject to open-source licensing. + + The code included in this file is provided under the terms of the ISC license + http://www.isc.org/downloads/software-support-policy/isc-license. Permission + to use, copy, modify, and/or distribute this software for any purpose with or + without fee is hereby granted provided that the above copyright notice and + this permission notice appear in all copies. + + YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER + EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE + DISCLAIMED. + + ============================================================================== +*/ + +#include + +#include + +using namespace yup; + +class ComplexVectorOperationsTests : public ::testing::Test +{ +protected: + static void multiplyReference (float* dest, const float* a, const float* b, int complexPairs) + { + for (int i = 0; i < complexPairs; ++i) + { + const int realIndex = i * 2; + const int imagIndex = realIndex + 1; + + dest[realIndex] = a[realIndex] * b[realIndex] - a[imagIndex] * b[imagIndex]; + dest[imagIndex] = a[realIndex] * b[imagIndex] + a[imagIndex] * b[realIndex]; + } + } +}; + +TEST_F (ComplexVectorOperationsTests, MultiplyMatchesScalarReference) +{ + constexpr int complexPairs = 7; + const float a[complexPairs * 2] = { 1.0f, 2.0f, -3.0f, 4.0f, 5.0f, -6.0f, -7.0f, -8.0f, 0.5f, -0.25f, 2.5f, 3.5f, -4.0f, 1.0f }; + const float b[complexPairs * 2] = { -2.0f, 0.5f, 1.0f, -1.5f, -0.5f, -2.0f, 3.0f, 2.0f, -4.0f, 0.25f, 1.5f, -2.5f, 0.75f, 2.25f }; + + float expected[complexPairs * 2] = {}; + float actual[complexPairs * 2] = {}; + + multiplyReference (expected, a, b, complexPairs); + ComplexVectorOperations::multiply (actual, a, b, complexPairs); + + for (int i = 0; i < complexPairs * 2; ++i) + EXPECT_NEAR (actual[i], expected[i], 1.0e-5f); +} + +TEST_F (ComplexVectorOperationsTests, MultiplyAccumulateMatchesScalarReference) +{ + constexpr int complexPairs = 5; + const float a[complexPairs * 2] = { 1.0f, -1.0f, 2.0f, 3.0f, -4.0f, 0.5f, 0.25f, -0.75f, 3.0f, 2.0f }; + const float b[complexPairs * 2] = { 0.5f, 2.0f, -1.0f, 0.25f, 3.0f, -2.0f, 1.5f, 4.0f, -0.5f, 1.0f }; + float expected[complexPairs * 2] = { 0.25f, -0.5f, 0.75f, 1.0f, -1.25f, 1.5f, 1.75f, -2.0f, 2.25f, 2.5f }; + float actual[complexPairs * 2]; + + for (int i = 0; i < complexPairs * 2; ++i) + actual[i] = expected[i]; + + float product[complexPairs * 2] = {}; + multiplyReference (product, a, b, complexPairs); + + for (int i = 0; i < complexPairs * 2; ++i) + expected[i] += product[i]; + + ComplexVectorOperations::multiplyAccumulate (a, b, actual, complexPairs); + + for (int i = 0; i < complexPairs * 2; ++i) + EXPECT_NEAR (actual[i], expected[i], 1.0e-5f); +} + +TEST_F (ComplexVectorOperationsTests, PowerSpectrumMatchesScalarReference) +{ + constexpr int complexPairs = 6; + const float data[complexPairs * 2] = { 3.0f, 4.0f, 1.0f, -2.0f, -5.0f, 12.0f, 0.5f, 0.25f, -0.75f, 1.25f, 2.0f, -3.0f }; + float actual[complexPairs] = {}; + + ComplexVectorOperations::powerSpectrum (actual, data, complexPairs); + + for (int i = 0; i < complexPairs; ++i) + EXPECT_NEAR (actual[i], data[i * 2] * data[i * 2] + data[i * 2 + 1] * data[i * 2 + 1], 1.0e-5f); +} diff --git a/tests/yup_simd/yup_FloatVectorOperations.cpp b/tests/yup_simd/yup_FloatVectorOperations.cpp index cc0807fe8..748a81a8d 100644 --- a/tests/yup_simd/yup_FloatVectorOperations.cpp +++ b/tests/yup_simd/yup_FloatVectorOperations.cpp @@ -378,6 +378,55 @@ TEST_F (FloatVectorOperationsTests, ComputeCorrelationMatchesScalar) EXPECT_NEAR (correlation, reference, 1.0e-5); } +TEST_F (FloatVectorOperationsTests, ReductionsMatchScalarReferences) +{ + const float data1[] = { -1.0f, 2.0f, -3.0f, 4.0f, -5.0f }; + const float data2[] = { 0.5f, -1.0f, 1.5f, -2.0f, 2.5f }; + + float expectedSumSquares = 0.0f; + float expectedDotProduct = 0.0f; + float expectedMaxAbs = 0.0f; + + for (int i = 0; i < 5; ++i) + { + expectedSumSquares += data1[i] * data1[i]; + expectedDotProduct += data1[i] * data2[i]; + expectedMaxAbs = jmax (expectedMaxAbs, std::abs (data1[i])); + } + + EXPECT_NEAR (FloatVectorOperations::sumSquares (data1, 5), expectedSumSquares, 1.0e-5f); + EXPECT_NEAR (FloatVectorOperations::rms (data1, 5), std::sqrt (expectedSumSquares / 5.0f), 1.0e-5f); + EXPECT_NEAR (FloatVectorOperations::dotProduct (data1, data2, 5), expectedDotProduct, 1.0e-5f); + EXPECT_NEAR (FloatVectorOperations::maxAbs (data1, 5), expectedMaxAbs, 1.0e-5f); + + EXPECT_FLOAT_EQ (FloatVectorOperations::sumSquares (data1, 0), 0.0f); + EXPECT_FLOAT_EQ (FloatVectorOperations::rms (data1, 0), 0.0f); + EXPECT_FLOAT_EQ (FloatVectorOperations::dotProduct (data1, data2, 0), 0.0f); + EXPECT_FLOAT_EQ (FloatVectorOperations::maxAbs (data1, 0), 0.0f); +} + +TEST_F (FloatVectorOperationsTests, DoubleReductionsMatchScalarReferences) +{ + const double data1[] = { -0.25, 0.5, -0.75, 1.0 }; + const double data2[] = { 2.0, -3.0, 4.0, -5.0 }; + + double expectedSumSquares = 0.0; + double expectedDotProduct = 0.0; + double expectedMaxAbs = 0.0; + + for (int i = 0; i < 4; ++i) + { + expectedSumSquares += data1[i] * data1[i]; + expectedDotProduct += data1[i] * data2[i]; + expectedMaxAbs = jmax (expectedMaxAbs, std::abs (data1[i])); + } + + EXPECT_NEAR (FloatVectorOperations::sumSquares (data1, 4), expectedSumSquares, 1.0e-12); + EXPECT_NEAR (FloatVectorOperations::rms (data1, 4), std::sqrt (expectedSumSquares / 4.0), 1.0e-12); + EXPECT_NEAR (FloatVectorOperations::dotProduct (data1, data2, 4), expectedDotProduct, 1.0e-12); + EXPECT_NEAR (FloatVectorOperations::maxAbs (data1, 4), expectedMaxAbs, 1.0e-12); +} + TEST_F (FloatVectorOperationsTests, FloatToDoubleAndBack) { Random& random = Random::getSystemRandom(); diff --git a/tests/yup_simd/yup_SIMDRegister.cpp b/tests/yup_simd/yup_SIMDRegister.cpp new file mode 100644 index 000000000..3eb2b4d60 --- /dev/null +++ b/tests/yup_simd/yup_SIMDRegister.cpp @@ -0,0 +1,59 @@ +/* + ============================================================================== + + This file is part of the YUP library. + Copyright (c) 2026 - kunitoki@gmail.com + + YUP is an open source library subject to open-source licensing. + + The code included in this file is provided under the terms of the ISC license + http://www.isc.org/downloads/software-support-policy/isc-license. Permission + to use, copy, modify, and/or distribute this software for any purpose with or + without fee is hereby granted provided that the above copyright notice and + this permission notice appear in all copies. + + YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER + EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE + DISCLAIMED. + + ============================================================================== +*/ + +#include + +#include + +using namespace yup; + +TEST (SIMDRegisterTests, Float4ArithmeticAndHorizontalOps) +{ + const float aValues[4] = { 1.0f, -2.0f, 3.0f, -4.0f }; + const float bValues[4] = { 5.0f, 6.0f, -7.0f, -8.0f }; + + const auto a = Float4::loadUnaligned (aValues); + const auto b = Float4::loadUnaligned (bValues); + const auto result = a + b * Float4::broadcast (2.0f); + + float stored[4] = {}; + result.storeUnaligned (stored); + + for (int i = 0; i < 4; ++i) + EXPECT_FLOAT_EQ (stored[i], aValues[i] + bValues[i] * 2.0f); + + EXPECT_FLOAT_EQ (a.sum(), -2.0f); + EXPECT_FLOAT_EQ (a.abs().hmax(), 4.0f); +} + +TEST (SIMDRegisterTests, MulAddAndLoadStoreRoundTrip) +{ + alignas (16) const float base[4] = { 1.0f, 2.0f, 3.0f, 4.0f }; + alignas (16) const float mul[4] = { -1.0f, 0.5f, 2.0f, -0.25f }; + alignas (16) const float add[4] = { 10.0f, 20.0f, 30.0f, 40.0f }; + alignas (16) float stored[4] = {}; + + const auto result = Float4::loadAligned (base).mulAdd (Float4::loadAligned (mul), Float4::loadAligned (add)); + result.storeAligned (stored); + + for (int i = 0; i < 4; ++i) + EXPECT_FLOAT_EQ (stored[i], base[i] + mul[i] * add[i]); +} diff --git a/thirdparty/xsimd/xsimd.h b/thirdparty/xsimd/xsimd.h new file mode 100644 index 000000000..cfb0d3c16 --- /dev/null +++ b/thirdparty/xsimd/xsimd.h @@ -0,0 +1,45 @@ +/* + ============================================================================== + + This file is part of the YUP library. + Copyright (c) 2026 - kunitoki@gmail.com + + YUP is an open source library subject to open-source licensing. + + The code included in this file is provided under the terms of the ISC license + http://www.isc.org/downloads/software-support-policy/isc-license. Permission + to use, copy, modify, and/or distribute this software for any purpose with or + without fee is hereby granted provided that the above copyright notice and + this permission notice appear in all copies. + + YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER + EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE + DISCLAIMED. + + ============================================================================== +*/ + +/* + ============================================================================== + + BEGIN_YUP_MODULE_DECLARATION + + ID: xsimd + vendor: xtensor-stack + version: 14.2.0 + name: xsimd C++ library for SIMD intrinsics + description: xsimd is a C++ library for SIMD intrinsics. + website: https://github.com/xtensor-stack/xsimd + upstream: https://github.com/xtensor-stack/xsimd/archive/refs/tags/14.2.0.tar.gz + license: BSD-3-Clause + + searchpaths: include + + END_YUP_MODULE_DECLARATION + + ============================================================================== +*/ + +#pragma once + +#include From 4e04a79aab8d1a60397d65d8c5f16bfa91d2bb4e Mon Sep 17 00:00:00 2001 From: kunitoki Date: Fri, 29 May 2026 10:56:19 +0200 Subject: [PATCH 2/5] More tweaks --- .../source/examples/SpectrumAnalyzer.h | 43 +++- .../yup_SpectrumAnalyzerComponent.cpp | 226 +++++++++++++++--- .../displays/yup_SpectrumAnalyzerComponent.h | 42 +++- .../yup_SpectrumAnalyzerComponent.cpp | 51 ++++ 4 files changed, 321 insertions(+), 41 deletions(-) diff --git a/examples/graphics/source/examples/SpectrumAnalyzer.h b/examples/graphics/source/examples/SpectrumAnalyzer.h index 0d534883c..56932c585 100644 --- a/examples/graphics/source/examples/SpectrumAnalyzer.h +++ b/examples/graphics/source/examples/SpectrumAnalyzer.h @@ -715,6 +715,19 @@ class SpectrumAnalyzerDemo }; addAndMakeVisible (*displayTypeCombo); + // Level mode selector + levelModeCombo = std::make_unique ("LevelMode"); + levelModeCombo->addItem ("Peak dBFS", 1); + levelModeCombo->addItem ("RMS dBFS", 2); + levelModeCombo->addItem ("Power dBFS", 3); + levelModeCombo->addItem ("PSD dBFS/Hz", 4); + levelModeCombo->setSelectedId (1); + levelModeCombo->onSelectedItemChanged = [this] + { + updateLevelMode(); + }; + addAndMakeVisible (*levelModeCombo); + // Release control releaseSlider = std::make_unique (yup::Slider::LinearHorizontal, "Release"); releaseSlider->setRange ({ 0.0, 5.0 }); @@ -815,7 +828,7 @@ class SpectrumAnalyzerDemo // Create parameter labels with proper font sizing auto labelFont = font.withHeight (12.0f); - for (const auto& labelText : { "Signal Type:", "Frequency:", "Amplitude:", "Sweep Duration:", "FFT Size:", "Window:", "Display:", "View Mode:", "Color Map:", "Release:", "Overlap:", "Smoothing:" }) + for (const auto& labelText : { "Signal Type:", "Frequency:", "Amplitude:", "Sweep Duration:", "FFT Size:", "Window:", "Display:", "View Mode:", "Color Map:", "Release:", "Overlap:", "Smoothing:", "Level Mode:" }) { auto label = parameterLabels.add (std::make_unique (labelText)); label->setText (labelText); @@ -898,6 +911,7 @@ class SpectrumAnalyzerDemo auto row3 = bounds.removeFromTop (rowHeight); auto releaseSection = row3.removeFromLeft (colWidth); auto overlapSection = row3.removeFromLeft (colWidth); + auto levelModeSection = row3.removeFromLeft (colWidth); parameterLabels[9]->setBounds (releaseSection.removeFromTop (labelHeight)); releaseSlider->setBounds (releaseSection.removeFromTop (controlHeight)); @@ -905,6 +919,9 @@ class SpectrumAnalyzerDemo parameterLabels[10]->setBounds (overlapSection.removeFromTop (labelHeight)); overlapSlider->setBounds (overlapSection.removeFromTop (controlHeight)); + parameterLabels[12]->setBounds (levelModeSection.removeFromTop (labelHeight)); + levelModeCombo->setBounds (levelModeSection.removeFromTop (controlHeight)); + // Fourth row: Status labels auto row4 = bounds.removeFromTop (30); auto freqStatus = row4.removeFromLeft (bounds.getWidth() / 3); @@ -1038,6 +1055,29 @@ class SpectrumAnalyzerDemo analyzerComponent.setDisplayType (displayType); } + void updateLevelMode() + { + auto levelMode = yup::SpectrumAnalyzerComponent::LevelMode::peakDecibels; + + switch (levelModeCombo->getSelectedId()) + { + case 1: + levelMode = yup::SpectrumAnalyzerComponent::LevelMode::peakDecibels; + break; + case 2: + levelMode = yup::SpectrumAnalyzerComponent::LevelMode::rmsDecibels; + break; + case 3: + levelMode = yup::SpectrumAnalyzerComponent::LevelMode::powerDecibels; + break; + case 4: + levelMode = yup::SpectrumAnalyzerComponent::LevelMode::powerSpectralDensity; + break; + } + + analyzerComponent.setLevelMode (levelMode); + } + void updateViewMode() { switch (viewModeCombo->getSelectedId()) @@ -1114,6 +1154,7 @@ class SpectrumAnalyzerDemo std::unique_ptr fftSizeCombo; std::unique_ptr windowTypeCombo; std::unique_ptr displayTypeCombo; + std::unique_ptr levelModeCombo; std::unique_ptr releaseSlider; std::unique_ptr overlapSlider; std::unique_ptr smoothingSlider; diff --git a/modules/yup_audio_gui/displays/yup_SpectrumAnalyzerComponent.cpp b/modules/yup_audio_gui/displays/yup_SpectrumAnalyzerComponent.cpp index 8f2dc3d33..abcfbdbaa 100644 --- a/modules/yup_audio_gui/displays/yup_SpectrumAnalyzerComponent.cpp +++ b/modules/yup_audio_gui/displays/yup_SpectrumAnalyzerComponent.cpp @@ -101,14 +101,14 @@ void SpectrumAnalyzerComponent::processFFT() // Perform FFT fftProcessor->performRealFFTForward (fftInputBuffer.data(), fftOutputBuffer.data()); - // Pre-compute magnitudes with window gain compensation + // Pre-compute raw FFT magnitudes. Calibration is applied when bins are mapped to display levels. const int numBins = fftSize / 2 + 1; for (int binIndex = 0; binIndex < numBins; ++binIndex) { const float real = fftOutputBuffer[static_cast (binIndex * 2)]; const float imag = fftOutputBuffer[static_cast (binIndex * 2 + 1)]; - const float magnitude = std::sqrt (real * real + imag * imag) * windowGain; + const float magnitude = std::sqrt (real * real + imag * imag); magnitudeBuffer[static_cast (binIndex)] = magnitude; } @@ -117,8 +117,6 @@ void SpectrumAnalyzerComponent::processFFT() void SpectrumAnalyzerComponent::updateDisplay (bool hasNewFFTData) { // Always apply consistent smoothing to prevent pulsating - const int numBins = fftSize / 2 + 1; - // Process display bins for (int i = 0; i < scopeSize; ++i) { @@ -162,36 +160,8 @@ void SpectrumAnalyzerComponent::updateDisplay (bool hasNewFFTData) const float endBin = (freqRangeEnd * float (fftSize)) / float (sampleRate); const float binSpan = endBin - startBin; - float magnitude = 0.0f; - - if (binSpan <= 1.5f) - { - // Low frequencies: Use interpolation for smooth transitions - const float exactBin = (centerFreq * float (fftSize)) / float (sampleRate); - const int bin1 = jlimit (0, numBins - 1, static_cast (exactBin)); - const int bin2 = jlimit (0, numBins - 1, bin1 + 1); - const float fraction = exactBin - float (bin1); - - const float mag1 = magnitudeBuffer[static_cast (bin1)]; - const float mag2 = magnitudeBuffer[static_cast (bin2)]; - - // Linear interpolation for smooth low-frequency response - magnitude = mag1 + fraction * (mag2 - mag1); - } - else - { - // High frequencies: Aggregate multiple bins using peak-hold - const int binStart = jlimit (0, numBins - 1, static_cast (startBin)); - const int binEnd = jlimit (0, numBins - 1, static_cast (endBin + 0.5f)); - - for (int binIndex = binStart; binIndex <= binEnd; ++binIndex) - magnitude = jmax (magnitude, magnitudeBuffer[static_cast (binIndex)]); - } - - // Convert to decibels with proper calibration - const float magnitudeDb = magnitude > 0.0f - ? 20.0f * std::log10 (magnitude / float (fftSize)) - : minDecibels; + const float exactBin = (centerFreq * float (fftSize)) / float (sampleRate); + const float magnitudeDb = getDisplayDecibelsForBinRange (startBin, endBin, exactBin); // Map to display range [0.0, 1.0] targetLevel = jmap (jlimit (minDecibels, maxDecibels, magnitudeDb), minDecibels, maxDecibels, 0.0f, 1.0f); @@ -243,13 +213,175 @@ void SpectrumAnalyzerComponent::generateWindow() { WindowFunctions::generate (currentWindowType, windowBuffer.data(), windowBuffer.size()); - // Calculate window gain compensation float windowSum = 0.0f; + float windowPowerSum = 0.0f; for (int i = 0; i < fftSize; ++i) - windowSum += windowBuffer[static_cast (i)]; + { + const float windowValue = windowBuffer[static_cast (i)]; + windowSum += windowValue; + windowPowerSum += windowValue * windowValue; + } + + windowCoherentGain = windowSum > 0.0f ? windowSum / float (fftSize) : 1.0f; + + equivalentNoiseBandwidthBins = (windowSum > 0.0f && windowPowerSum > 0.0f) + ? (float (fftSize) * windowPowerSum) / (windowSum * windowSum) + : 1.0f; +} + +float SpectrumAnalyzerComponent::getBinPeakAmplitude (int binIndex) const noexcept +{ + const int lastBin = fftSize / 2; + + if (! isPositiveAndBelow (binIndex, lastBin + 1)) + return 0.0f; + + const float coherentGain = windowCoherentGain > 0.0f ? windowCoherentGain : 1.0f; + const float oneSidedScale = (binIndex > 0 && binIndex < lastBin) ? 2.0f : 1.0f; + const float rawMagnitude = magnitudeBuffer[static_cast (binIndex)]; + + return (oneSidedScale * rawMagnitude) / (coherentGain * float (fftSize)); +} + +float SpectrumAnalyzerComponent::getBinRMSAmplitude (int binIndex) const noexcept +{ + const int lastBin = fftSize / 2; + const float peakAmplitude = getBinPeakAmplitude (binIndex); + + if (binIndex <= 0 || binIndex >= lastBin) + return peakAmplitude; + + return peakAmplitude * 0.7071067811865475f; +} + +float SpectrumAnalyzerComponent::getBinPower (int binIndex) const noexcept +{ + const float rmsAmplitude = getBinRMSAmplitude (binIndex); + return rmsAmplitude * rmsAmplitude; +} + +float SpectrumAnalyzerComponent::getBinPowerSpectralDensity (int binIndex) const noexcept +{ + const float enbwHz = getEquivalentNoiseBandwidthHz(); + return enbwHz > 0.0f ? getBinPower (binIndex) / enbwHz : 0.0f; +} + +float SpectrumAnalyzerComponent::getBinLinearLevel (int binIndex) const noexcept +{ + switch (levelMode) + { + case LevelMode::peakDecibels: + return getBinPeakAmplitude (binIndex); + + case LevelMode::rmsDecibels: + return getBinRMSAmplitude (binIndex); + + case LevelMode::powerDecibels: + return getBinPower (binIndex); + + case LevelMode::powerSpectralDensity: + return getBinPowerSpectralDensity (binIndex); + } + + return getBinPeakAmplitude (binIndex); +} + +float SpectrumAnalyzerComponent::linearLevelToDecibels (float level) const noexcept +{ + if (level <= 0.0f) + return minDecibels; + + return (isPowerMode() ? 10.0f : 20.0f) * std::log10 (level); +} + +float SpectrumAnalyzerComponent::getInterpolatedPeakDecibels (float exactBin) const noexcept +{ + const int numBins = fftSize / 2 + 1; + const int lastBin = numBins - 1; + const int nearestBin = jlimit (0, lastBin, roundToInt (exactBin)); + + int peakBin = nearestBin; + float peakLevel = getBinLinearLevel (nearestBin); + + const int searchStart = jmax (0, nearestBin - 1); + const int searchEnd = jmin (lastBin, nearestBin + 1); + + for (int binIndex = searchStart; binIndex <= searchEnd; ++binIndex) + { + const float binLevel = getBinLinearLevel (binIndex); + + if (binLevel > peakLevel) + { + peakLevel = binLevel; + peakBin = binIndex; + } + } + + const float peakDecibels = linearLevelToDecibels (peakLevel); + + if (peakBin <= 0 || peakBin >= lastBin) + return peakDecibels; + + const float y0 = linearLevelToDecibels (getBinLinearLevel (peakBin - 1)); + const float y1 = peakDecibels; + const float y2 = linearLevelToDecibels (getBinLinearLevel (peakBin + 1)); + const float denominator = y0 - 2.0f * y1 + y2; + + if (std::abs (denominator) < 1.0e-6f || denominator >= 0.0f) + return y1; - // Gain compensation factor to restore energy after windowing - windowGain = windowSum > 0.0f ? float (fftSize) / windowSum : 1.0f; + const float offset = jlimit (-1.0f, 1.0f, 0.5f * (y0 - y2) / denominator); + return y1 - 0.25f * (y0 - y2) * offset; +} + +float SpectrumAnalyzerComponent::getDisplayDecibelsForBinRange (float startBin, float endBin, float centerBin) const noexcept +{ + const int numBins = fftSize / 2 + 1; + const int lastBin = numBins - 1; + const float binSpan = endBin - startBin; + + if (binSpan <= 1.5f && ! isPowerMode()) + return getInterpolatedPeakDecibels (centerBin); + + const int binStart = jlimit (0, lastBin, static_cast (std::floor (startBin))); + const int binEnd = jlimit (0, lastBin, static_cast (std::ceil (endBin))); + + if (levelMode == LevelMode::powerDecibels) + { + float bandPower = 0.0f; + + for (int binIndex = binStart; binIndex <= binEnd; ++binIndex) + bandPower += getBinPower (binIndex); + + return linearLevelToDecibels (bandPower); + } + + if (levelMode == LevelMode::powerSpectralDensity) + { + float densitySum = 0.0f; + int densityCount = 0; + + for (int binIndex = binStart; binIndex <= binEnd; ++binIndex) + { + densitySum += getBinPowerSpectralDensity (binIndex); + ++densityCount; + } + + return linearLevelToDecibels (densityCount > 0 ? densitySum / float (densityCount) : 0.0f); + } + + float peakLevel = 0.0f; + + for (int binIndex = binStart; binIndex <= binEnd; ++binIndex) + peakLevel = jmax (peakLevel, getBinLinearLevel (binIndex)); + + return linearLevelToDecibels (peakLevel); +} + +bool SpectrumAnalyzerComponent::isPowerMode() const noexcept +{ + return levelMode == LevelMode::powerDecibels + || levelMode == LevelMode::powerSpectralDensity; } //============================================================================== @@ -485,8 +617,10 @@ void SpectrumAnalyzerComponent::setWindowType (WindowType type) if (currentWindowType != type) { currentWindowType = type; + generateWindow(); + needsWindowUpdate = false; - needsWindowUpdate = true; + repaint(); } } @@ -551,6 +685,20 @@ void SpectrumAnalyzerComponent::setDisplayType (DisplayType type) } } +void SpectrumAnalyzerComponent::setLevelMode (LevelMode mode) +{ + if (levelMode != mode) + { + levelMode = mode; + repaint(); + } +} + +float SpectrumAnalyzerComponent::getEquivalentNoiseBandwidthHz() const noexcept +{ + return equivalentNoiseBandwidthBins * (float (sampleRate) / float (fftSize)); +} + //============================================================================== float SpectrumAnalyzerComponent::getFrequencyForBin (int binIndex) const noexcept { diff --git a/modules/yup_audio_gui/displays/yup_SpectrumAnalyzerComponent.h b/modules/yup_audio_gui/displays/yup_SpectrumAnalyzerComponent.h index f2a34e88f..da5e94925 100644 --- a/modules/yup_audio_gui/displays/yup_SpectrumAnalyzerComponent.h +++ b/modules/yup_audio_gui/displays/yup_SpectrumAnalyzerComponent.h @@ -64,6 +64,15 @@ class YUP_API SpectrumAnalyzerComponent filled ///< Draw spectrum as smooth filled area }; + /** Level calculation mode for spectrum magnitudes. */ + enum class LevelMode + { + peakDecibels, ///< Peak amplitude in dBFS, where a full-scale sine peak is 0 dBFS + rmsDecibels, ///< RMS amplitude in dBFS, where a full-scale sine is approximately -3.01 dBFS + powerDecibels, ///< RMS power in dBFS, summed across each displayed frequency band + powerSpectralDensity ///< RMS power spectral density in dBFS/Hz, corrected for window ENBW + }; + //============================================================================== /** Display constants */ enum @@ -159,6 +168,25 @@ class YUP_API SpectrumAnalyzerComponent /** Returns the current display type. */ DisplayType getDisplayType() const noexcept { return displayType; } + //============================================================================== + /** Sets the level calculation mode. + + @param mode the level mode to use when converting FFT bins to decibels + */ + void setLevelMode (LevelMode mode); + + /** Returns the current level calculation mode. */ + LevelMode getLevelMode() const noexcept { return levelMode; } + + /** Returns the coherent gain of the current window. */ + float getWindowCoherentGain() const noexcept { return windowCoherentGain; } + + /** Returns the equivalent noise bandwidth of the current window in FFT bins. */ + float getEquivalentNoiseBandwidthBins() const noexcept { return equivalentNoiseBandwidthBins; } + + /** Returns the equivalent noise bandwidth of the current window in Hz. */ + float getEquivalentNoiseBandwidthHz() const noexcept; + //============================================================================== /** Sets the release time for spectrum falloff. @@ -214,6 +242,16 @@ class YUP_API SpectrumAnalyzerComponent void drawFrequencyGrid (Graphics& g, const Rectangle& bounds); void drawDecibelGrid (Graphics& g, const Rectangle& bounds); + float getBinPeakAmplitude (int binIndex) const noexcept; + float getBinRMSAmplitude (int binIndex) const noexcept; + float getBinPower (int binIndex) const noexcept; + float getBinPowerSpectralDensity (int binIndex) const noexcept; + float getBinLinearLevel (int binIndex) const noexcept; + float linearLevelToDecibels (float level) const noexcept; + float getInterpolatedPeakDecibels (float exactBin) const noexcept; + float getDisplayDecibelsForBinRange (float startBin, float endBin, float centerBin) const noexcept; + bool isPowerMode() const noexcept; + float frequencyToX (float frequency, const Rectangle& bounds) const noexcept; float decibelToY (float decibel, const Rectangle& bounds) const noexcept; float binToY (int binIndex, float height) const noexcept; @@ -235,6 +273,7 @@ class YUP_API SpectrumAnalyzerComponent // Configuration WindowType currentWindowType = WindowType::hann; DisplayType displayType = DisplayType::filled; + LevelMode levelMode = LevelMode::peakDecibels; int fftSize = 4096; float minFrequency = 20.0f; float maxFrequency = 20000.0f; @@ -246,7 +285,8 @@ class YUP_API SpectrumAnalyzerComponent float releaseTimeSeconds = 1.0f; // Window compensation - float windowGain = 1.0f; + float windowCoherentGain = 1.0f; + float equivalentNoiseBandwidthBins = 1.0f; bool needsWindowUpdate = true; YUP_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (SpectrumAnalyzerComponent) diff --git a/tests/yup_audio_gui/yup_SpectrumAnalyzerComponent.cpp b/tests/yup_audio_gui/yup_SpectrumAnalyzerComponent.cpp index f057952ee..7bf80d369 100644 --- a/tests/yup_audio_gui/yup_SpectrumAnalyzerComponent.cpp +++ b/tests/yup_audio_gui/yup_SpectrumAnalyzerComponent.cpp @@ -163,6 +163,57 @@ TEST_F (SpectrumAnalyzerComponentTests, ToggleWindowTypes) EXPECT_EQ (WindowType::hann, analyzer->getWindowType()); } +TEST_F (SpectrumAnalyzerComponentTests, RectangularWindowHasUnityCalibration) +{ + analyzer->setWindowType (WindowType::rectangular); + + EXPECT_FLOAT_EQ (1.0f, analyzer->getWindowCoherentGain()); + EXPECT_FLOAT_EQ (1.0f, analyzer->getEquivalentNoiseBandwidthBins()); +} + +TEST_F (SpectrumAnalyzerComponentTests, HannWindowReportsExpectedNoiseBandwidth) +{ + analyzer->setWindowType (WindowType::hann); + + EXPECT_NEAR (0.5f, analyzer->getWindowCoherentGain(), 0.001f); + EXPECT_NEAR (1.5f, analyzer->getEquivalentNoiseBandwidthBins(), 0.01f); +} + +TEST_F (SpectrumAnalyzerComponentTests, EquivalentNoiseBandwidthHzFollowsSampleRate) +{ + analyzer->setWindowType (WindowType::rectangular); + analyzer->setSampleRate (48000.0); + + EXPECT_NEAR (48000.0f / 2048.0f, analyzer->getEquivalentNoiseBandwidthHz(), 0.001f); +} + +//============================================================================== +// Level Mode Tests +//============================================================================== + +TEST_F (SpectrumAnalyzerComponentTests, DefaultLevelModeIsPeakDecibels) +{ + EXPECT_EQ (SpectrumAnalyzerComponent::LevelMode::peakDecibels, analyzer->getLevelMode()); +} + +TEST_F (SpectrumAnalyzerComponentTests, SetLevelModeToRMSDecibels) +{ + analyzer->setLevelMode (SpectrumAnalyzerComponent::LevelMode::rmsDecibels); + EXPECT_EQ (SpectrumAnalyzerComponent::LevelMode::rmsDecibels, analyzer->getLevelMode()); +} + +TEST_F (SpectrumAnalyzerComponentTests, SetLevelModeToPowerDecibels) +{ + analyzer->setLevelMode (SpectrumAnalyzerComponent::LevelMode::powerDecibels); + EXPECT_EQ (SpectrumAnalyzerComponent::LevelMode::powerDecibels, analyzer->getLevelMode()); +} + +TEST_F (SpectrumAnalyzerComponentTests, SetLevelModeToPowerSpectralDensity) +{ + analyzer->setLevelMode (SpectrumAnalyzerComponent::LevelMode::powerSpectralDensity); + EXPECT_EQ (SpectrumAnalyzerComponent::LevelMode::powerSpectralDensity, analyzer->getLevelMode()); +} + //============================================================================== // Update Rate Tests //============================================================================== From 271eb3d27ab4d4100ee0974b2a854191c3682ad6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 29 May 2026 22:09:23 +0000 Subject: [PATCH 3/5] Add comprehensive yup_simd unit tests for all classes --- tests/CMakeLists.txt | 1 + tests/yup_simd.cpp | 1 + .../yup_AffineTransformOperations.cpp | 178 +++++++- tests/yup_simd/yup_ColorVectorOperations.cpp | 229 ++++++++++ .../yup_simd/yup_ComplexVectorOperations.cpp | 198 +++++++++ tests/yup_simd/yup_SIMDRegister.cpp | 391 ++++++++++++++++++ tests/yup_simd/yup_Vec.cpp | 358 ++++++++++++++++ 7 files changed, 1355 insertions(+), 1 deletion(-) create mode 100644 tests/yup_simd/yup_Vec.cpp diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 2faad6c3b..160886428 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -55,6 +55,7 @@ set (target_gtest_modules "") set (target_preloaded "") set (target_modules yup_core + yup_simd yup_audio_basics yup_audio_devices yup_audio_formats diff --git a/tests/yup_simd.cpp b/tests/yup_simd.cpp index 16a7e4301..aa74f453e 100644 --- a/tests/yup_simd.cpp +++ b/tests/yup_simd.cpp @@ -24,3 +24,4 @@ #include "yup_simd/yup_AffineTransformOperations.cpp" #include "yup_simd/yup_ColorVectorOperations.cpp" #include "yup_simd/yup_SIMDRegister.cpp" +#include "yup_simd/yup_Vec.cpp" diff --git a/tests/yup_simd/yup_AffineTransformOperations.cpp b/tests/yup_simd/yup_AffineTransformOperations.cpp index 8e234a98a..1a22a5f3f 100644 --- a/tests/yup_simd/yup_AffineTransformOperations.cpp +++ b/tests/yup_simd/yup_AffineTransformOperations.cpp @@ -25,7 +25,24 @@ using namespace yup; -TEST (AffineTransformOpsTests, TransformPointsMatchesScalarReference) +class AffineTransformOpsTests : public ::testing::Test +{ +protected: + static void transformScalar (const float* srcXs, const float* srcYs, + float* dstXs, float* dstYs, int numPoints, + float sx, float shx, float tx, float shy, float sy, float ty) noexcept + { + for (int i = 0; i < numPoints; ++i) + { + const float x = srcXs[i]; + const float y = srcYs[i]; + dstXs[i] = sx * x + shx * y + tx; + dstYs[i] = shy * x + sy * y + ty; + } + } +}; + +TEST_F (AffineTransformOpsTests, TransformPointsMatchesScalarReference) { constexpr int numPoints = 6; const float srcXs[numPoints] = { -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f }; @@ -48,3 +65,162 @@ TEST (AffineTransformOpsTests, TransformPointsMatchesScalarReference) EXPECT_NEAR (dstYs[i], shy * srcXs[i] + sy * srcYs[i] + ty, 1.0e-5f); } } + +TEST_F (AffineTransformOpsTests, TransformPointsInPlace) +{ + constexpr int numPoints = 5; + float xs[numPoints] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f }; + float ys[numPoints] = { 5.0f, 4.0f, 3.0f, 2.0f, 1.0f }; + + const float origXs[numPoints] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f }; + const float origYs[numPoints] = { 5.0f, 4.0f, 3.0f, 2.0f, 1.0f }; + + constexpr float sx = 2.0f, shx = 0.5f, tx = 1.0f; + constexpr float shy = -0.5f, sy = 3.0f, ty = -1.0f; + + AffineTransformOperations::transformPoints (xs, ys, numPoints, sx, shx, tx, shy, sy, ty); + + for (int i = 0; i < numPoints; ++i) + { + EXPECT_NEAR (xs[i], sx * origXs[i] + shx * origYs[i] + tx, 1.0e-5f); + EXPECT_NEAR (ys[i], shy * origXs[i] + sy * origYs[i] + ty, 1.0e-5f); + } +} + +TEST_F (AffineTransformOpsTests, IdentityTransformLeavesPointsUnchanged) +{ + constexpr int numPoints = 8; + const float srcXs[numPoints] = { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f }; + const float srcYs[numPoints] = { 8.0f, -7.0f, 6.0f, -5.0f, 4.0f, -3.0f, 2.0f, -1.0f }; + float dstXs[numPoints] = {}; + float dstYs[numPoints] = {}; + + // Identity: sx=1, shx=0, tx=0, shy=0, sy=1, ty=0 + AffineTransformOperations::transformPoints (srcXs, srcYs, dstXs, dstYs, numPoints, 1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f); + + for (int i = 0; i < numPoints; ++i) + { + EXPECT_FLOAT_EQ (dstXs[i], srcXs[i]); + EXPECT_FLOAT_EQ (dstYs[i], srcYs[i]); + } +} + +TEST_F (AffineTransformOpsTests, PureTranslation) +{ + constexpr int numPoints = 6; + const float srcXs[numPoints] = { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f }; + const float srcYs[numPoints] = { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f }; + float dstXs[numPoints] = {}; + float dstYs[numPoints] = {}; + + constexpr float tx = 100.0f, ty = 200.0f; + + // Pure translation: sx=1, shx=0, shy=0, sy=1 + AffineTransformOperations::transformPoints (srcXs, srcYs, dstXs, dstYs, numPoints, 1.0f, 0.0f, tx, 0.0f, 1.0f, ty); + + for (int i = 0; i < numPoints; ++i) + { + EXPECT_FLOAT_EQ (dstXs[i], srcXs[i] + tx); + EXPECT_FLOAT_EQ (dstYs[i], srcYs[i] + ty); + } +} + +TEST_F (AffineTransformOpsTests, PureScale) +{ + constexpr int numPoints = 8; + const float srcXs[numPoints] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f }; + const float srcYs[numPoints] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f }; + float dstXs[numPoints] = {}; + float dstYs[numPoints] = {}; + + constexpr float sx = 2.0f, sy = -0.5f; + + AffineTransformOperations::transformPoints (srcXs, srcYs, dstXs, dstYs, numPoints, sx, 0.0f, 0.0f, 0.0f, sy, 0.0f); + + for (int i = 0; i < numPoints; ++i) + { + EXPECT_FLOAT_EQ (dstXs[i], sx * srcXs[i]); + EXPECT_FLOAT_EQ (dstYs[i], sy * srcYs[i]); + } +} + +TEST_F (AffineTransformOpsTests, SinglePointTransform) +{ + const float srcX = 3.0f; + const float srcY = 4.0f; + float dstX = 0.0f; + float dstY = 0.0f; + + constexpr float sx = 2.0f, shx = 1.0f, tx = -1.0f; + constexpr float shy = 0.5f, sy = -1.0f, ty = 3.0f; + + AffineTransformOperations::transformPoints (&srcX, &srcY, &dstX, &dstY, 1, sx, shx, tx, shy, sy, ty); + + EXPECT_NEAR (dstX, sx * srcX + shx * srcY + tx, 1.0e-5f); + EXPECT_NEAR (dstY, shy * srcX + sy * srcY + ty, 1.0e-5f); +} + +TEST_F (AffineTransformOpsTests, ZeroPointsDoesNothing) +{ + float dstXs[4] = { 99.0f, 99.0f, 99.0f, 99.0f }; + float dstYs[4] = { 99.0f, 99.0f, 99.0f, 99.0f }; + + // numPoints=0 should not write to output + AffineTransformOperations::transformPoints (nullptr, nullptr, dstXs, dstYs, 0, 1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f); + + for (int i = 0; i < 4; ++i) + { + EXPECT_FLOAT_EQ (dstXs[i], 99.0f); + EXPECT_FLOAT_EQ (dstYs[i], 99.0f); + } +} + +TEST_F (AffineTransformOpsTests, LargeBufferMatchesScalarReference) +{ + constexpr int numPoints = 100; + float srcXs[numPoints], srcYs[numPoints]; + float dstXs[numPoints], dstYs[numPoints]; + float refXs[numPoints], refYs[numPoints]; + + for (int i = 0; i < numPoints; ++i) + { + srcXs[i] = (float) (i - 50); + srcYs[i] = (float) (i * 2 - 100); + } + + constexpr float sx = 1.5f, shx = 0.3f, tx = 7.0f; + constexpr float shy = -0.2f, sy = 2.0f, ty = -3.0f; + + AffineTransformOperations::transformPoints (srcXs, srcYs, dstXs, dstYs, numPoints, sx, shx, tx, shy, sy, ty); + transformScalar (srcXs, srcYs, refXs, refYs, numPoints, sx, shx, tx, shy, sy, ty); + + for (int i = 0; i < numPoints; ++i) + { + EXPECT_NEAR (dstXs[i], refXs[i], 1.0e-4f); + EXPECT_NEAR (dstYs[i], refYs[i], 1.0e-4f); + } +} + +TEST_F (AffineTransformOpsTests, ThreePointsUsesScalarTail) +{ + // 3 points: the SIMD path handles 4 at a time, so all 3 go through scalar tail + constexpr int numPoints = 3; + const float srcXs[numPoints] = { 1.0f, 2.0f, 3.0f }; + const float srcYs[numPoints] = { 4.0f, 5.0f, 6.0f }; + float dstXs[numPoints] = {}; + float dstYs[numPoints] = {}; + float refXs[numPoints] = {}; + float refYs[numPoints] = {}; + + constexpr float sx = 2.0f, shx = 0.5f, tx = 1.0f; + constexpr float shy = -0.5f, sy = 3.0f, ty = -1.0f; + + AffineTransformOperations::transformPoints (srcXs, srcYs, dstXs, dstYs, numPoints, sx, shx, tx, shy, sy, ty); + transformScalar (srcXs, srcYs, refXs, refYs, numPoints, sx, shx, tx, shy, sy, ty); + + for (int i = 0; i < numPoints; ++i) + { + EXPECT_NEAR (dstXs[i], refXs[i], 1.0e-5f); + EXPECT_NEAR (dstYs[i], refYs[i], 1.0e-5f); + } +} diff --git a/tests/yup_simd/yup_ColorVectorOperations.cpp b/tests/yup_simd/yup_ColorVectorOperations.cpp index 26e5cc270..8e0a24280 100644 --- a/tests/yup_simd/yup_ColorVectorOperations.cpp +++ b/tests/yup_simd/yup_ColorVectorOperations.cpp @@ -25,6 +25,10 @@ using namespace yup; +// ============================================================================== +// premultiplyARGB tests +// ============================================================================== + TEST (ColorVectorOpsTests, PremultiplyARGBMatchesScalarReference) { uint32_t pixels[] = { 0x80ff8040u, 0xff010203u, 0x00010203u }; @@ -36,6 +40,53 @@ TEST (ColorVectorOpsTests, PremultiplyARGBMatchesScalarReference) EXPECT_EQ (pixels[2], 0x00000000u); } +TEST (ColorVectorOpsTests, PremultiplyARGBFullAlphaUnchanged) +{ + // alpha=0xff: all channels should be unchanged + uint32_t pixels[] = { 0xff112233u }; + ColorVectorOperations::premultiplyARGB (pixels, 1); + EXPECT_EQ (pixels[0], 0xff112233u); +} + +TEST (ColorVectorOpsTests, PremultiplyARGBZeroAlphaBlacksOut) +{ + // alpha=0x00: all channels should become 0 + uint32_t pixels[] = { 0x00aabbccu }; + ColorVectorOperations::premultiplyARGB (pixels, 1); + EXPECT_EQ (pixels[0], 0x00000000u); +} + +TEST (ColorVectorOpsTests, PremultiplyARGBHalfAlpha) +{ + // alpha=0x80 (128): channels should be approximately halved + // premultiply: (c * 128 + 127) / 255 + uint32_t pixels[] = { 0x80ffu << 16 | 0x80u << 8 | 0x80u }; + pixels[0] = (0x80u << 24) | (0xffu << 16) | (0x80u << 8) | 0x80u; + const uint32_t alpha = 0x80u; + const uint32_t expectedRed = (0xffu * alpha + 127u) / 255u; + const uint32_t expectedGreen = (0x80u * alpha + 127u) / 255u; + const uint32_t expectedBlue = (0x80u * alpha + 127u) / 255u; + + ColorVectorOperations::premultiplyARGB (pixels, 1); + + EXPECT_EQ ((pixels[0] >> 24) & 0xffu, alpha); + EXPECT_EQ ((pixels[0] >> 16) & 0xffu, expectedRed); + EXPECT_EQ ((pixels[0] >> 8) & 0xffu, expectedGreen); + EXPECT_EQ (pixels[0] & 0xffu, expectedBlue); +} + +TEST (ColorVectorOpsTests, PremultiplyARGBZeroPixels) +{ + // Should not write to output buffer + uint32_t pixels[] = { 0xdeadbeefu }; + ColorVectorOperations::premultiplyARGB (pixels, 0); + EXPECT_EQ (pixels[0], 0xdeadbeefu); +} + +// ============================================================================== +// premultiplyRGBA tests +// ============================================================================== + TEST (ColorVectorOpsTests, PremultiplyRGBAMatchesScalarReference) { uint8 pixels[] = { 255, 128, 64, 128, 1, 2, 3, 255, 1, 2, 3, 0 }; @@ -56,6 +107,37 @@ TEST (ColorVectorOpsTests, PremultiplyRGBAMatchesScalarReference) EXPECT_EQ (pixels[11], 0); } +TEST (ColorVectorOpsTests, PremultiplyRGBAFullAlphaUnchanged) +{ + uint8 pixels[] = { 100, 150, 200, 255 }; + ColorVectorOperations::premultiplyRGBA (pixels, 1); + EXPECT_EQ (pixels[0], 100); + EXPECT_EQ (pixels[1], 150); + EXPECT_EQ (pixels[2], 200); + EXPECT_EQ (pixels[3], 255); +} + +TEST (ColorVectorOpsTests, PremultiplyRGBAZeroAlphaBlacksOut) +{ + uint8 pixels[] = { 100, 150, 200, 0 }; + ColorVectorOperations::premultiplyRGBA (pixels, 1); + EXPECT_EQ (pixels[0], 0); + EXPECT_EQ (pixels[1], 0); + EXPECT_EQ (pixels[2], 0); + EXPECT_EQ (pixels[3], 0); +} + +TEST (ColorVectorOpsTests, PremultiplyRGBAZeroPixels) +{ + uint8 pixels[] = { 99, 98, 97, 96 }; + ColorVectorOperations::premultiplyRGBA (pixels, 0); + EXPECT_EQ (pixels[0], 99); +} + +// ============================================================================== +// convertARGBtoRGBA tests +// ============================================================================== + TEST (ColorVectorOpsTests, ConvertARGBtoRGBA) { const uint32_t argb[] = { 0x12345678u, 0xaabbccddu }; @@ -77,6 +159,43 @@ TEST (ColorVectorOpsTests, ConvertARGBtoRGBAInPlace) EXPECT_EQ (pixels[1], 0xbbccddaau); } +TEST (ColorVectorOpsTests, ConvertARGBtoRGBAZeroPixels) +{ + const uint32_t argb[] = { 0x12345678u }; + uint32_t rgba[] = { 0xdeadbeefu }; + ColorVectorOperations::convertARGBtoRGBA (argb, rgba, 0); + EXPECT_EQ (rgba[0], 0xdeadbeefu); +} + +TEST (ColorVectorOpsTests, ConvertARGBtoRGBASinglePixel) +{ + // 0xAARRGGBB -> 0xRRGGBBAA + const uint32_t argb[] = { 0xaabbccddu }; + uint32_t rgba[] = { 0u }; + ColorVectorOperations::convertARGBtoRGBA (argb, rgba, 1); + EXPECT_EQ (rgba[0], 0xbbccddaau); +} + +TEST (ColorVectorOpsTests, ConvertARGBtoRGBAAllZero) +{ + const uint32_t argb[] = { 0x00000000u }; + uint32_t rgba[] = { 0xffffffffu }; + ColorVectorOperations::convertARGBtoRGBA (argb, rgba, 1); + EXPECT_EQ (rgba[0], 0x00000000u); +} + +TEST (ColorVectorOpsTests, ConvertARGBtoRGBAAllOnes) +{ + const uint32_t argb[] = { 0xffffffffu }; + uint32_t rgba[] = { 0u }; + ColorVectorOperations::convertARGBtoRGBA (argb, rgba, 1); + EXPECT_EQ (rgba[0], 0xffffffffu); +} + +// ============================================================================== +// convertGrayscaleToRGBA tests +// ============================================================================== + TEST (ColorVectorOpsTests, ConvertGrayscaleToRGBA) { const uint8 gray[] = { 0, 127, 255 }; @@ -98,6 +217,29 @@ TEST (ColorVectorOpsTests, ConvertGrayscaleToRGBA) EXPECT_EQ (rgba[11], 255); } +TEST (ColorVectorOpsTests, ConvertGrayscaleToRGBAAlwaysOpaqueAlpha) +{ + const uint8 gray[] = { 128 }; + uint8 rgba[4] = {}; + ColorVectorOperations::convertGrayscaleToRGBA (gray, rgba, 1); + EXPECT_EQ (rgba[0], 128); + EXPECT_EQ (rgba[1], 128); + EXPECT_EQ (rgba[2], 128); + EXPECT_EQ (rgba[3], 255); +} + +TEST (ColorVectorOpsTests, ConvertGrayscaleToRGBAZeroPixels) +{ + const uint8 gray[] = { 99 }; + uint8 rgba[4] = { 1, 2, 3, 4 }; + ColorVectorOperations::convertGrayscaleToRGBA (gray, rgba, 0); + EXPECT_EQ (rgba[0], 1); +} + +// ============================================================================== +// convertRGBToRGBA tests +// ============================================================================== + TEST (ColorVectorOpsTests, ConvertRGBToRGBA) { const uint8 rgb[] = { 1, 2, 3, 4, 5, 6 }; @@ -115,6 +257,29 @@ TEST (ColorVectorOpsTests, ConvertRGBToRGBA) EXPECT_EQ (rgba[7], 255); } +TEST (ColorVectorOpsTests, ConvertRGBToRGBAAlwaysOpaqueAlpha) +{ + const uint8 rgb[] = { 10, 20, 30 }; + uint8 rgba[4] = {}; + ColorVectorOperations::convertRGBToRGBA (rgb, rgba, 1); + EXPECT_EQ (rgba[0], 10); + EXPECT_EQ (rgba[1], 20); + EXPECT_EQ (rgba[2], 30); + EXPECT_EQ (rgba[3], 255); +} + +TEST (ColorVectorOpsTests, ConvertRGBToRGBAZeroPixels) +{ + const uint8 rgb[] = { 99, 98, 97 }; + uint8 rgba[4] = { 1, 2, 3, 4 }; + ColorVectorOperations::convertRGBToRGBA (rgb, rgba, 0); + EXPECT_EQ (rgba[0], 1); +} + +// ============================================================================== +// lerpRows tests +// ============================================================================== + TEST (ColorVectorOpsTests, LerpRowsMatchesScalarReference) { const float rowA[] = { 0.0f, 0.25f, 0.5f, 0.75f, 1.0f, 0.5f, 0.0f, 1.0f }; @@ -126,3 +291,67 @@ TEST (ColorVectorOpsTests, LerpRowsMatchesScalarReference) for (int i = 0; i < 8; ++i) EXPECT_NEAR (dst[i], rowA[i] + (rowB[i] - rowA[i]) * 0.25f, 1.0e-5f); } + +TEST (ColorVectorOpsTests, LerpRowsAtZeroGivesRowA) +{ + const float rowA[] = { 1.0f, 2.0f, 3.0f, 4.0f }; + const float rowB[] = { 5.0f, 6.0f, 7.0f, 8.0f }; + float dst[4] = {}; + + ColorVectorOperations::lerpRows (rowA, rowB, dst, 0.0f, 1); + + for (int i = 0; i < 4; ++i) + EXPECT_NEAR (dst[i], rowA[i], 1.0e-5f); +} + +TEST (ColorVectorOpsTests, LerpRowsAtOneGivesRowB) +{ + const float rowA[] = { 1.0f, 2.0f, 3.0f, 4.0f }; + const float rowB[] = { 5.0f, 6.0f, 7.0f, 8.0f }; + float dst[4] = {}; + + ColorVectorOperations::lerpRows (rowA, rowB, dst, 1.0f, 1); + + for (int i = 0; i < 4; ++i) + EXPECT_NEAR (dst[i], rowB[i], 1.0e-5f); +} + +TEST (ColorVectorOpsTests, LerpRowsAtHalfIsMidpoint) +{ + const float rowA[] = { 0.0f, 0.0f, 0.0f, 0.0f }; + const float rowB[] = { 1.0f, 1.0f, 1.0f, 1.0f }; + float dst[4] = {}; + + ColorVectorOperations::lerpRows (rowA, rowB, dst, 0.5f, 1); + + for (int i = 0; i < 4; ++i) + EXPECT_NEAR (dst[i], 0.5f, 1.0e-5f); +} + +TEST (ColorVectorOpsTests, LerpRowsZeroPixels) +{ + const float rowA[] = { 1.0f, 1.0f, 1.0f, 1.0f }; + const float rowB[] = { 2.0f, 2.0f, 2.0f, 2.0f }; + float dst[4] = { 9.0f, 9.0f, 9.0f, 9.0f }; + + ColorVectorOperations::lerpRows (rowA, rowB, dst, 0.5f, 0); + + for (int i = 0; i < 4; ++i) + EXPECT_FLOAT_EQ (dst[i], 9.0f); +} + +TEST (ColorVectorOpsTests, LerpRowsMultiplePixels) +{ + // 4 pixels = 16 floats + float rowA[16], rowB[16], dst[16]; + for (int i = 0; i < 16; ++i) + { + rowA[i] = (float) i; + rowB[i] = (float) (i + 16); + } + + ColorVectorOperations::lerpRows (rowA, rowB, dst, 0.5f, 4); + + for (int i = 0; i < 16; ++i) + EXPECT_NEAR (dst[i], rowA[i] + (rowB[i] - rowA[i]) * 0.5f, 1.0e-4f); +} diff --git a/tests/yup_simd/yup_ComplexVectorOperations.cpp b/tests/yup_simd/yup_ComplexVectorOperations.cpp index f4c578729..94527514e 100644 --- a/tests/yup_simd/yup_ComplexVectorOperations.cpp +++ b/tests/yup_simd/yup_ComplexVectorOperations.cpp @@ -23,6 +23,8 @@ #include +#include + using namespace yup; class ComplexVectorOperationsTests : public ::testing::Test @@ -41,6 +43,10 @@ class ComplexVectorOperationsTests : public ::testing::Test } }; +// ============================================================================== +// multiply tests +// ============================================================================== + TEST_F (ComplexVectorOperationsTests, MultiplyMatchesScalarReference) { constexpr int complexPairs = 7; @@ -57,6 +63,70 @@ TEST_F (ComplexVectorOperationsTests, MultiplyMatchesScalarReference) EXPECT_NEAR (actual[i], expected[i], 1.0e-5f); } +TEST_F (ComplexVectorOperationsTests, MultiplySinglePair) +{ + // (1 + 2i) * (3 + 4i) = (1*3 - 2*4) + (1*4 + 2*3)i = -5 + 10i + const float a[] = { 1.0f, 2.0f }; + const float b[] = { 3.0f, 4.0f }; + float result[2] = {}; + + ComplexVectorOperations::multiply (result, a, b, 1); + + EXPECT_NEAR (result[0], -5.0f, 1.0e-5f); + EXPECT_NEAR (result[1], 10.0f, 1.0e-5f); +} + +TEST_F (ComplexVectorOperationsTests, MultiplyByOne) +{ + // Multiplying by (1 + 0i) should leave values unchanged + const float a[] = { 3.0f, 4.0f, -1.0f, 2.0f }; + const float one[] = { 1.0f, 0.0f, 1.0f, 0.0f }; + float result[4] = {}; + + ComplexVectorOperations::multiply (result, a, one, 2); + + EXPECT_NEAR (result[0], a[0], 1.0e-5f); + EXPECT_NEAR (result[1], a[1], 1.0e-5f); + EXPECT_NEAR (result[2], a[2], 1.0e-5f); + EXPECT_NEAR (result[3], a[3], 1.0e-5f); +} + +TEST_F (ComplexVectorOperationsTests, MultiplyByConjugateGivesModulusSq) +{ + // (a + bi) * (a - bi) = a^2 + b^2 (real, zero imaginary) + const float a[] = { 3.0f, 4.0f }; + const float conj[] = { 3.0f, -4.0f }; + float result[2] = {}; + + ComplexVectorOperations::multiply (result, a, conj, 1); + + EXPECT_NEAR (result[0], 25.0f, 1.0e-5f); // 3^2 + 4^2 = 25 + EXPECT_NEAR (result[1], 0.0f, 1.0e-5f); +} + +TEST_F (ComplexVectorOperationsTests, MultiplyLargeBufferMatchesReference) +{ + constexpr int complexPairs = 20; + float a[complexPairs * 2], b[complexPairs * 2]; + float expected[complexPairs * 2], actual[complexPairs * 2]; + + for (int i = 0; i < complexPairs * 2; ++i) + { + a[i] = (float) (i - complexPairs) * 0.1f; + b[i] = (float) (complexPairs - i) * 0.2f; + } + + multiplyReference (expected, a, b, complexPairs); + ComplexVectorOperations::multiply (actual, a, b, complexPairs); + + for (int i = 0; i < complexPairs * 2; ++i) + EXPECT_NEAR (actual[i], expected[i], 1.0e-4f); +} + +// ============================================================================== +// multiplyAccumulate tests +// ============================================================================== + TEST_F (ComplexVectorOperationsTests, MultiplyAccumulateMatchesScalarReference) { constexpr int complexPairs = 5; @@ -80,6 +150,66 @@ TEST_F (ComplexVectorOperationsTests, MultiplyAccumulateMatchesScalarReference) EXPECT_NEAR (actual[i], expected[i], 1.0e-5f); } +TEST_F (ComplexVectorOperationsTests, MultiplyAccumulateSinglePair) +{ + // Start with (10 + 5i), add (1+2i)*(3+4i) = -5+10i + // Result: (10-5) + (5+10)i = 5 + 15i + const float a[] = { 1.0f, 2.0f }; + const float b[] = { 3.0f, 4.0f }; + float y[] = { 10.0f, 5.0f }; + + ComplexVectorOperations::multiplyAccumulate (a, b, y, 1); + + EXPECT_NEAR (y[0], 5.0f, 1.0e-5f); + EXPECT_NEAR (y[1], 15.0f, 1.0e-5f); +} + +TEST_F (ComplexVectorOperationsTests, MultiplyAccumulateWithZeroInputLeavesDestUnchanged) +{ + // a = (0 + 0i), any b: product is (0 + 0i) -> dest unchanged + const float a[] = { 0.0f, 0.0f, 0.0f, 0.0f }; + const float b[] = { 3.0f, 4.0f, 1.0f, 2.0f }; + float y[] = { 7.0f, 8.0f, 9.0f, 10.0f }; + + ComplexVectorOperations::multiplyAccumulate (a, b, y, 2); + + EXPECT_NEAR (y[0], 7.0f, 1.0e-5f); + EXPECT_NEAR (y[1], 8.0f, 1.0e-5f); + EXPECT_NEAR (y[2], 9.0f, 1.0e-5f); + EXPECT_NEAR (y[3], 10.0f, 1.0e-5f); +} + +TEST_F (ComplexVectorOperationsTests, MultiplyAccumulateLargeBuffer) +{ + constexpr int complexPairs = 16; + float a[complexPairs * 2], b[complexPairs * 2]; + float initY[complexPairs * 2]; + float expected[complexPairs * 2], actual[complexPairs * 2]; + + for (int i = 0; i < complexPairs * 2; ++i) + { + a[i] = (float) i * 0.1f; + b[i] = (float) (complexPairs * 2 - i) * 0.1f; + initY[i] = (float) i * 0.5f; + expected[i] = initY[i]; + actual[i] = initY[i]; + } + + float product[complexPairs * 2] = {}; + multiplyReference (product, a, b, complexPairs); + for (int i = 0; i < complexPairs * 2; ++i) + expected[i] += product[i]; + + ComplexVectorOperations::multiplyAccumulate (a, b, actual, complexPairs); + + for (int i = 0; i < complexPairs * 2; ++i) + EXPECT_NEAR (actual[i], expected[i], 1.0e-4f); +} + +// ============================================================================== +// powerSpectrum tests +// ============================================================================== + TEST_F (ComplexVectorOperationsTests, PowerSpectrumMatchesScalarReference) { constexpr int complexPairs = 6; @@ -91,3 +221,71 @@ TEST_F (ComplexVectorOperationsTests, PowerSpectrumMatchesScalarReference) for (int i = 0; i < complexPairs; ++i) EXPECT_NEAR (actual[i], data[i * 2] * data[i * 2] + data[i * 2 + 1] * data[i * 2 + 1], 1.0e-5f); } + +TEST_F (ComplexVectorOperationsTests, PowerSpectrumSinglePair) +{ + // (3, 4): magnitude^2 = 9 + 16 = 25 + const float data[] = { 3.0f, 4.0f }; + float result = 0.0f; + + ComplexVectorOperations::powerSpectrum (&result, data, 1); + + EXPECT_NEAR (result, 25.0f, 1.0e-5f); +} + +TEST_F (ComplexVectorOperationsTests, PowerSpectrumZeroPairs) +{ + const float data[] = { 3.0f, 4.0f }; + float result = 99.0f; + + ComplexVectorOperations::powerSpectrum (&result, data, 0); + + // Should not have written anything + EXPECT_FLOAT_EQ (result, 99.0f); +} + +TEST_F (ComplexVectorOperationsTests, PowerSpectrumUnitVector) +{ + // (1 + 0i): power = 1 + const float data[] = { 1.0f, 0.0f }; + float result = 0.0f; + ComplexVectorOperations::powerSpectrum (&result, data, 1); + EXPECT_NEAR (result, 1.0f, 1.0e-5f); +} + +TEST_F (ComplexVectorOperationsTests, PowerSpectrumZeroVector) +{ + // (0 + 0i): power = 0 + const float data[] = { 0.0f, 0.0f }; + float result = 99.0f; + ComplexVectorOperations::powerSpectrum (&result, data, 1); + EXPECT_NEAR (result, 0.0f, 1.0e-5f); +} + +TEST_F (ComplexVectorOperationsTests, PowerSpectrumLargeBuffer) +{ + constexpr int complexPairs = 20; + float data[complexPairs * 2]; + float actual[complexPairs] = {}; + + for (int i = 0; i < complexPairs * 2; ++i) + data[i] = (float) (i + 1) * 0.5f; + + ComplexVectorOperations::powerSpectrum (actual, data, complexPairs); + + for (int i = 0; i < complexPairs; ++i) + EXPECT_NEAR (actual[i], data[i * 2] * data[i * 2] + data[i * 2 + 1] * data[i * 2 + 1], 1.0e-4f); +} + +TEST_F (ComplexVectorOperationsTests, PowerSpectrumPythagoreanTriple) +{ + // (3, 4): 3^2+4^2=25, (5, 12): 5^2+12^2=169, (8, 15): 8^2+15^2=289 + const float data[] = { 3.0f, 4.0f, 5.0f, 12.0f, 8.0f, 15.0f }; + float actual[3] = {}; + + ComplexVectorOperations::powerSpectrum (actual, data, 3); + + EXPECT_NEAR (actual[0], 25.0f, 1.0e-4f); + EXPECT_NEAR (actual[1], 169.0f, 1.0e-4f); + EXPECT_NEAR (actual[2], 289.0f, 1.0e-4f); +} diff --git a/tests/yup_simd/yup_SIMDRegister.cpp b/tests/yup_simd/yup_SIMDRegister.cpp index 3eb2b4d60..43ea61de3 100644 --- a/tests/yup_simd/yup_SIMDRegister.cpp +++ b/tests/yup_simd/yup_SIMDRegister.cpp @@ -25,6 +25,51 @@ using namespace yup; +// ============================================================================== +// Float4 tests +// ============================================================================== + +TEST (SIMDRegisterTests, Float4DefaultConstructorIsZero) +{ + Float4 r; + float stored[4] = { 1.0f, 2.0f, 3.0f, 4.0f }; + r.storeUnaligned (stored); + + for (int i = 0; i < 4; ++i) + EXPECT_FLOAT_EQ (stored[i], 0.0f); +} + +TEST (SIMDRegisterTests, Float4ZeroHelperIsAllZero) +{ + const auto r = Float4::zero(); + float stored[4] = {}; + r.storeUnaligned (stored); + + for (int i = 0; i < 4; ++i) + EXPECT_FLOAT_EQ (stored[i], 0.0f); +} + +TEST (SIMDRegisterTests, Float4BroadcastFillsAllLanes) +{ + const auto r = Float4::broadcast (3.14f); + float stored[4] = {}; + r.storeUnaligned (stored); + + for (int i = 0; i < 4; ++i) + EXPECT_FLOAT_EQ (stored[i], 3.14f); +} + +TEST (SIMDRegisterTests, Float4ElementAccessOperator) +{ + const float values[4] = { 10.0f, 20.0f, 30.0f, 40.0f }; + const auto r = Float4::loadUnaligned (values); + + EXPECT_FLOAT_EQ (r[0], 10.0f); + EXPECT_FLOAT_EQ (r[1], 20.0f); + EXPECT_FLOAT_EQ (r[2], 30.0f); + EXPECT_FLOAT_EQ (r[3], 40.0f); +} + TEST (SIMDRegisterTests, Float4ArithmeticAndHorizontalOps) { const float aValues[4] = { 1.0f, -2.0f, 3.0f, -4.0f }; @@ -44,6 +89,138 @@ TEST (SIMDRegisterTests, Float4ArithmeticAndHorizontalOps) EXPECT_FLOAT_EQ (a.abs().hmax(), 4.0f); } +TEST (SIMDRegisterTests, Float4Subtraction) +{ + const float aValues[4] = { 10.0f, 20.0f, 30.0f, 40.0f }; + const float bValues[4] = { 1.0f, 3.0f, 5.0f, 7.0f }; + + const auto a = Float4::loadUnaligned (aValues); + const auto b = Float4::loadUnaligned (bValues); + const auto result = a - b; + + float stored[4] = {}; + result.storeUnaligned (stored); + + for (int i = 0; i < 4; ++i) + EXPECT_FLOAT_EQ (stored[i], aValues[i] - bValues[i]); +} + +TEST (SIMDRegisterTests, Float4Division) +{ + const float aValues[4] = { 4.0f, 9.0f, 16.0f, 25.0f }; + const float bValues[4] = { 2.0f, 3.0f, 4.0f, 5.0f }; + + const auto a = Float4::loadUnaligned (aValues); + const auto b = Float4::loadUnaligned (bValues); + const auto result = a / b; + + float stored[4] = {}; + result.storeUnaligned (stored); + + for (int i = 0; i < 4; ++i) + EXPECT_FLOAT_EQ (stored[i], aValues[i] / bValues[i]); +} + +TEST (SIMDRegisterTests, Float4CompoundAddAssign) +{ + const float aValues[4] = { 1.0f, 2.0f, 3.0f, 4.0f }; + const float bValues[4] = { 10.0f, 20.0f, 30.0f, 40.0f }; + + auto a = Float4::loadUnaligned (aValues); + const auto b = Float4::loadUnaligned (bValues); + a += b; + + float stored[4] = {}; + a.storeUnaligned (stored); + + for (int i = 0; i < 4; ++i) + EXPECT_FLOAT_EQ (stored[i], aValues[i] + bValues[i]); +} + +TEST (SIMDRegisterTests, Float4CompoundMulAssign) +{ + const float aValues[4] = { 1.0f, 2.0f, 3.0f, 4.0f }; + const float bValues[4] = { 2.0f, 3.0f, 4.0f, 5.0f }; + + auto a = Float4::loadUnaligned (aValues); + const auto b = Float4::loadUnaligned (bValues); + a *= b; + + float stored[4] = {}; + a.storeUnaligned (stored); + + for (int i = 0; i < 4; ++i) + EXPECT_FLOAT_EQ (stored[i], aValues[i] * bValues[i]); +} + +TEST (SIMDRegisterTests, Float4ElementwiseMin) +{ + const float aValues[4] = { 1.0f, 5.0f, 2.0f, 4.0f }; + const float bValues[4] = { 3.0f, 2.0f, 4.0f, 1.0f }; + + const auto a = Float4::loadUnaligned (aValues); + const auto b = Float4::loadUnaligned (bValues); + const auto result = a.min (b); + + float stored[4] = {}; + result.storeUnaligned (stored); + + for (int i = 0; i < 4; ++i) + EXPECT_FLOAT_EQ (stored[i], std::min (aValues[i], bValues[i])); +} + +TEST (SIMDRegisterTests, Float4ElementwiseMax) +{ + const float aValues[4] = { 1.0f, 5.0f, 2.0f, 4.0f }; + const float bValues[4] = { 3.0f, 2.0f, 4.0f, 1.0f }; + + const auto a = Float4::loadUnaligned (aValues); + const auto b = Float4::loadUnaligned (bValues); + const auto result = a.max (b); + + float stored[4] = {}; + result.storeUnaligned (stored); + + for (int i = 0; i < 4; ++i) + EXPECT_FLOAT_EQ (stored[i], std::max (aValues[i], bValues[i])); +} + +TEST (SIMDRegisterTests, Float4AbsOnMixedValues) +{ + const float values[4] = { -1.0f, 2.0f, -3.0f, 4.0f }; + const auto r = Float4::loadUnaligned (values); + const auto result = r.abs(); + + float stored[4] = {}; + result.storeUnaligned (stored); + + EXPECT_FLOAT_EQ (stored[0], 1.0f); + EXPECT_FLOAT_EQ (stored[1], 2.0f); + EXPECT_FLOAT_EQ (stored[2], 3.0f); + EXPECT_FLOAT_EQ (stored[3], 4.0f); +} + +TEST (SIMDRegisterTests, Float4SumAllLanes) +{ + const float values[4] = { 1.0f, 2.0f, 3.0f, 4.0f }; + const auto r = Float4::loadUnaligned (values); + EXPECT_FLOAT_EQ (r.sum(), 10.0f); +} + +TEST (SIMDRegisterTests, Float4SumWithNegatives) +{ + const float values[4] = { 1.0f, -1.0f, 2.0f, -2.0f }; + const auto r = Float4::loadUnaligned (values); + EXPECT_FLOAT_EQ (r.sum(), 0.0f); +} + +TEST (SIMDRegisterTests, Float4HmaxFindsLargest) +{ + const float values[4] = { -3.0f, 7.0f, 1.0f, -10.0f }; + const auto r = Float4::loadUnaligned (values); + EXPECT_FLOAT_EQ (r.hmax(), 7.0f); +} + TEST (SIMDRegisterTests, MulAddAndLoadStoreRoundTrip) { alignas (16) const float base[4] = { 1.0f, 2.0f, 3.0f, 4.0f }; @@ -57,3 +234,217 @@ TEST (SIMDRegisterTests, MulAddAndLoadStoreRoundTrip) for (int i = 0; i < 4; ++i) EXPECT_FLOAT_EQ (stored[i], base[i] + mul[i] * add[i]); } + +TEST (SIMDRegisterTests, Float4LoadFromPointerConstructor) +{ + const float values[4] = { 5.0f, 6.0f, 7.0f, 8.0f }; + const Float4 r (values); + + float stored[4] = {}; + r.storeUnaligned (stored); + + for (int i = 0; i < 4; ++i) + EXPECT_FLOAT_EQ (stored[i], values[i]); +} + +TEST (SIMDRegisterTests, Float4ScalarConstructorBroadcasts) +{ + const Float4 r (42.0f); + for (int i = 0; i < 4; ++i) + EXPECT_FLOAT_EQ (r[i], 42.0f); +} + +// ============================================================================== +// Float8 tests +// ============================================================================== + +TEST (SIMDRegisterTests, Float8DefaultConstructorIsZero) +{ + Float8 r; + for (int i = 0; i < 8; ++i) + EXPECT_FLOAT_EQ (r[i], 0.0f); +} + +TEST (SIMDRegisterTests, Float8BroadcastAndArithmetic) +{ + const Float8 a = Float8::broadcast (2.0f); + const Float8 b = Float8::broadcast (3.0f); + const auto result = a * b; + + for (int i = 0; i < 8; ++i) + EXPECT_FLOAT_EQ (result[i], 6.0f); +} + +TEST (SIMDRegisterTests, Float8LoadStoreRoundTrip) +{ + float values[8] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f }; + const auto r = Float8::loadUnaligned (values); + + float stored[8] = {}; + r.storeUnaligned (stored); + + for (int i = 0; i < 8; ++i) + EXPECT_FLOAT_EQ (stored[i], values[i]); +} + +TEST (SIMDRegisterTests, Float8Sum) +{ + float values[8] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f }; + const auto r = Float8::loadUnaligned (values); + EXPECT_FLOAT_EQ (r.sum(), 36.0f); +} + +TEST (SIMDRegisterTests, Float8Hmax) +{ + float values[8] = { 1.0f, -5.0f, 3.0f, 9.0f, 2.0f, -3.0f, 4.0f, 0.0f }; + const auto r = Float8::loadUnaligned (values); + EXPECT_FLOAT_EQ (r.hmax(), 9.0f); +} + +TEST (SIMDRegisterTests, Float8AbsNegatesAll) +{ + float values[8]; + for (int i = 0; i < 8; ++i) + values[i] = (i % 2 == 0) ? -(float) (i + 1) : (float) (i + 1); + + const auto r = Float8::loadUnaligned (values).abs(); + for (int i = 0; i < 8; ++i) + EXPECT_FLOAT_EQ (r[i], (float) (i + 1)); +} + +// ============================================================================== +// Double2 tests +// ============================================================================== + +TEST (SIMDRegisterTests, Double2DefaultConstructorIsZero) +{ + Double2 r; + for (int i = 0; i < 2; ++i) + EXPECT_DOUBLE_EQ (r[i], 0.0); +} + +TEST (SIMDRegisterTests, Double2ArithmeticOperations) +{ + const double aValues[2] = { 1.5, -2.5 }; + const double bValues[2] = { 3.0, 4.0 }; + + const auto a = Double2::loadUnaligned (aValues); + const auto b = Double2::loadUnaligned (bValues); + + const auto sum = a + b; + const auto diff = a - b; + const auto prod = a * b; + const auto quot = a / b; + + EXPECT_DOUBLE_EQ (sum[0], aValues[0] + bValues[0]); + EXPECT_DOUBLE_EQ (sum[1], aValues[1] + bValues[1]); + EXPECT_DOUBLE_EQ (diff[0], aValues[0] - bValues[0]); + EXPECT_DOUBLE_EQ (diff[1], aValues[1] - bValues[1]); + EXPECT_DOUBLE_EQ (prod[0], aValues[0] * bValues[0]); + EXPECT_DOUBLE_EQ (prod[1], aValues[1] * bValues[1]); + EXPECT_DOUBLE_EQ (quot[0], aValues[0] / bValues[0]); + EXPECT_DOUBLE_EQ (quot[1], aValues[1] / bValues[1]); +} + +TEST (SIMDRegisterTests, Double2BroadcastAndSum) +{ + const auto r = Double2::broadcast (3.14); + EXPECT_NEAR (r.sum(), 6.28, 1.0e-12); +} + +TEST (SIMDRegisterTests, Double2MinMax) +{ + const double aValues[2] = { 1.0, 5.0 }; + const double bValues[2] = { 3.0, 2.0 }; + + const auto a = Double2::loadUnaligned (aValues); + const auto b = Double2::loadUnaligned (bValues); + + const auto minResult = a.min (b); + const auto maxResult = a.max (b); + + EXPECT_DOUBLE_EQ (minResult[0], 1.0); + EXPECT_DOUBLE_EQ (minResult[1], 2.0); + EXPECT_DOUBLE_EQ (maxResult[0], 3.0); + EXPECT_DOUBLE_EQ (maxResult[1], 5.0); +} + +// ============================================================================== +// Double4 tests +// ============================================================================== + +TEST (SIMDRegisterTests, Double4LoadStoreRoundTrip) +{ + const double values[4] = { 1.1, 2.2, 3.3, 4.4 }; + const auto r = Double4::loadUnaligned (values); + + double stored[4] = {}; + r.storeUnaligned (stored); + + for (int i = 0; i < 4; ++i) + EXPECT_DOUBLE_EQ (stored[i], values[i]); +} + +TEST (SIMDRegisterTests, Double4MulAdd) +{ + const double baseValues[4] = { 1.0, 2.0, 3.0, 4.0 }; + const double mulValues[4] = { 2.0, 3.0, 4.0, 5.0 }; + const double addValues[4] = { 10.0, 20.0, 30.0, 40.0 }; + + const auto base = Double4::loadUnaligned (baseValues); + const auto mul = Double4::loadUnaligned (mulValues); + const auto add = Double4::loadUnaligned (addValues); + const auto result = base.mulAdd (mul, add); + + for (int i = 0; i < 4; ++i) + EXPECT_DOUBLE_EQ (result[i], baseValues[i] + mulValues[i] * addValues[i]); +} + +TEST (SIMDRegisterTests, Double4Sum) +{ + const double values[4] = { 1.0, 2.0, 3.0, 4.0 }; + const auto r = Double4::loadUnaligned (values); + EXPECT_DOUBLE_EQ (r.sum(), 10.0); +} + +TEST (SIMDRegisterTests, Double4Hmax) +{ + const double values[4] = { -1.0, 3.5, 2.0, -4.0 }; + const auto r = Double4::loadUnaligned (values); + EXPECT_DOUBLE_EQ (r.hmax(), 3.5); +} + +// ============================================================================== +// General SIMDRegister with non-power-of-two N +// ============================================================================== + +TEST (SIMDRegisterTests, SIMDRegisterFloat5PartialBatch) +{ + using Float5 = SIMDRegister; + const float values[5] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f }; + const auto r = Float5::loadUnaligned (values); + + float stored[5] = {}; + r.storeUnaligned (stored); + + for (int i = 0; i < 5; ++i) + EXPECT_FLOAT_EQ (stored[i], values[i]); +} + +TEST (SIMDRegisterTests, SIMDRegisterFloat5Sum) +{ + using Float5 = SIMDRegister; + const float values[5] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f }; + const auto r = Float5::loadUnaligned (values); + EXPECT_FLOAT_EQ (r.sum(), 15.0f); +} + +TEST (SIMDRegisterTests, SIMDRegisterFloat1ElementAccess) +{ + using Float1 = SIMDRegister; + const float value = 7.0f; + const Float1 r (value); + EXPECT_FLOAT_EQ (r[0], value); + EXPECT_FLOAT_EQ (r.sum(), value); + EXPECT_FLOAT_EQ (r.hmax(), value); +} diff --git a/tests/yup_simd/yup_Vec.cpp b/tests/yup_simd/yup_Vec.cpp new file mode 100644 index 000000000..61ad33e0d --- /dev/null +++ b/tests/yup_simd/yup_Vec.cpp @@ -0,0 +1,358 @@ +/* + ============================================================================== + + This file is part of the YUP library. + Copyright (c) 2026 - kunitoki@gmail.com + + YUP is an open source library subject to open-source licensing. + + The code included in this file is provided under the terms of the ISC license + http://www.isc.org/downloads/software-support-policy/isc-license. Permission + to use, copy, modify, and/or distribute this software for any purpose with or + without fee is hereby granted provided that the above copyright notice and + this permission notice appear in all copies. + + YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER + EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE + DISCLAIMED. + + ============================================================================== +*/ + +#include + +#include + +#include + +using namespace yup; + +// ============================================================================== +// Vec2f tests +// ============================================================================== + +TEST (Vec2fTests, DefaultConstructorZeroInitialises) +{ + const Vec2f v; + EXPECT_FLOAT_EQ (v.x, 0.0f); + EXPECT_FLOAT_EQ (v.y, 0.0f); +} + +TEST (Vec2fTests, ParameterisedConstructor) +{ + const Vec2f v (3.0f, -7.5f); + EXPECT_FLOAT_EQ (v.x, 3.0f); + EXPECT_FLOAT_EQ (v.y, -7.5f); +} + +TEST (Vec2fTests, Addition) +{ + const Vec2f a (1.0f, 2.0f); + const Vec2f b (3.0f, 4.0f); + const Vec2f r = a + b; + EXPECT_FLOAT_EQ (r.x, 4.0f); + EXPECT_FLOAT_EQ (r.y, 6.0f); +} + +TEST (Vec2fTests, AdditionWithNegatives) +{ + const Vec2f a (5.0f, -3.0f); + const Vec2f b (-2.0f, 7.0f); + const Vec2f r = a + b; + EXPECT_FLOAT_EQ (r.x, 3.0f); + EXPECT_FLOAT_EQ (r.y, 4.0f); +} + +TEST (Vec2fTests, AdditionIdentity) +{ + const Vec2f a (10.0f, -5.0f); + const Vec2f zero; + const Vec2f r = a + zero; + EXPECT_FLOAT_EQ (r.x, a.x); + EXPECT_FLOAT_EQ (r.y, a.y); +} + +TEST (Vec2fTests, ScalarMultiplication) +{ + const Vec2f v (2.0f, -4.0f); + const Vec2f r = v * 3.0f; + EXPECT_FLOAT_EQ (r.x, 6.0f); + EXPECT_FLOAT_EQ (r.y, -12.0f); +} + +TEST (Vec2fTests, ScalarMultiplicationByZero) +{ + const Vec2f v (99.0f, -77.0f); + const Vec2f r = v * 0.0f; + EXPECT_FLOAT_EQ (r.x, 0.0f); + EXPECT_FLOAT_EQ (r.y, 0.0f); +} + +TEST (Vec2fTests, ScalarMultiplicationByOne) +{ + const Vec2f v (3.5f, -1.5f); + const Vec2f r = v * 1.0f; + EXPECT_FLOAT_EQ (r.x, v.x); + EXPECT_FLOAT_EQ (r.y, v.y); +} + +TEST (Vec2fTests, DotProduct) +{ + const Vec2f a (1.0f, 0.0f); + const Vec2f b (0.0f, 1.0f); + EXPECT_FLOAT_EQ (a.dot (b), 0.0f); // orthogonal +} + +TEST (Vec2fTests, DotProductCollinear) +{ + const Vec2f a (1.0f, 0.0f); + EXPECT_FLOAT_EQ (a.dot (a), 1.0f); +} + +TEST (Vec2fTests, DotProductGeneral) +{ + // (1,2)ยท(3,4) = 1*3 + 2*4 = 11 + const Vec2f a (1.0f, 2.0f); + const Vec2f b (3.0f, 4.0f); + EXPECT_FLOAT_EQ (a.dot (b), 11.0f); +} + +TEST (Vec2fTests, LengthOf345Triangle) +{ + const Vec2f v (3.0f, 4.0f); + EXPECT_NEAR (v.length(), 5.0f, 1.0e-5f); +} + +TEST (Vec2fTests, LengthOfZeroVector) +{ + const Vec2f v; + EXPECT_NEAR (v.length(), 0.0f, 1.0e-7f); +} + +TEST (Vec2fTests, LengthOfUnitVector) +{ + const Vec2f v (1.0f, 0.0f); + EXPECT_NEAR (v.length(), 1.0f, 1.0e-7f); +} + +TEST (Vec2fTests, NormalizedHasUnitLength) +{ + const Vec2f v (3.0f, 4.0f); + const Vec2f n = v.normalized(); + EXPECT_NEAR (n.length(), 1.0f, 1.0e-6f); +} + +TEST (Vec2fTests, NormalizedDirection) +{ + const Vec2f v (3.0f, 4.0f); + const Vec2f n = v.normalized(); + EXPECT_NEAR (n.x, 0.6f, 1.0e-6f); + EXPECT_NEAR (n.y, 0.8f, 1.0e-6f); +} + +TEST (Vec2fTests, NormalizedZeroVectorReturnsZero) +{ + const Vec2f v; + const Vec2f n = v.normalized(); + EXPECT_FLOAT_EQ (n.x, 0.0f); + EXPECT_FLOAT_EQ (n.y, 0.0f); +} + +// ============================================================================== +// Vec4f tests +// ============================================================================== + +TEST (Vec4fTests, DefaultConstructorZeroInitialises) +{ + const Vec4f v; + EXPECT_FLOAT_EQ (v.r, 0.0f); + EXPECT_FLOAT_EQ (v.g, 0.0f); + EXPECT_FLOAT_EQ (v.b, 0.0f); + EXPECT_FLOAT_EQ (v.a, 0.0f); +} + +TEST (Vec4fTests, ParameterisedConstructor) +{ + const Vec4f v (0.1f, 0.2f, 0.3f, 1.0f); + EXPECT_FLOAT_EQ (v.r, 0.1f); + EXPECT_FLOAT_EQ (v.g, 0.2f); + EXPECT_FLOAT_EQ (v.b, 0.3f); + EXPECT_FLOAT_EQ (v.a, 1.0f); +} + +TEST (Vec4fTests, Addition) +{ + const Vec4f a (0.1f, 0.2f, 0.3f, 0.4f); + const Vec4f b (0.4f, 0.3f, 0.2f, 0.1f); + const Vec4f r = a + b; + EXPECT_NEAR (r.r, 0.5f, 1.0e-6f); + EXPECT_NEAR (r.g, 0.5f, 1.0e-6f); + EXPECT_NEAR (r.b, 0.5f, 1.0e-6f); + EXPECT_NEAR (r.a, 0.5f, 1.0e-6f); +} + +TEST (Vec4fTests, AdditionIdentity) +{ + const Vec4f a (1.0f, 2.0f, 3.0f, 4.0f); + const Vec4f zero; + const Vec4f r = a + zero; + EXPECT_NEAR (r.r, a.r, 1.0e-6f); + EXPECT_NEAR (r.g, a.g, 1.0e-6f); + EXPECT_NEAR (r.b, a.b, 1.0e-6f); + EXPECT_NEAR (r.a, a.a, 1.0e-6f); +} + +TEST (Vec4fTests, ScalarMultiplication) +{ + const Vec4f v (1.0f, 2.0f, 3.0f, 4.0f); + const Vec4f r = v * 2.0f; + EXPECT_NEAR (r.r, 2.0f, 1.0e-6f); + EXPECT_NEAR (r.g, 4.0f, 1.0e-6f); + EXPECT_NEAR (r.b, 6.0f, 1.0e-6f); + EXPECT_NEAR (r.a, 8.0f, 1.0e-6f); +} + +TEST (Vec4fTests, ScalarMultiplicationByZero) +{ + const Vec4f v (9.0f, 8.0f, 7.0f, 6.0f); + const Vec4f r = v * 0.0f; + EXPECT_NEAR (r.r, 0.0f, 1.0e-6f); + EXPECT_NEAR (r.g, 0.0f, 1.0e-6f); + EXPECT_NEAR (r.b, 0.0f, 1.0e-6f); + EXPECT_NEAR (r.a, 0.0f, 1.0e-6f); +} + +TEST (Vec4fTests, ScalarMultiplicationByOne) +{ + const Vec4f v (5.0f, 6.0f, 7.0f, 8.0f); + const Vec4f r = v * 1.0f; + EXPECT_NEAR (r.r, v.r, 1.0e-6f); + EXPECT_NEAR (r.g, v.g, 1.0e-6f); + EXPECT_NEAR (r.b, v.b, 1.0e-6f); + EXPECT_NEAR (r.a, v.a, 1.0e-6f); +} + +TEST (Vec4fTests, ElementWiseMultiplication) +{ + const Vec4f a (2.0f, 3.0f, 4.0f, 5.0f); + const Vec4f b (1.5f, 2.0f, 0.5f, 0.25f); + const Vec4f r = a * b; + EXPECT_NEAR (r.r, 3.0f, 1.0e-6f); + EXPECT_NEAR (r.g, 6.0f, 1.0e-6f); + EXPECT_NEAR (r.b, 2.0f, 1.0e-6f); + EXPECT_NEAR (r.a, 1.25f, 1.0e-6f); +} + +TEST (Vec4fTests, ElementWiseMultiplicationByZeroVec) +{ + const Vec4f a (3.0f, 4.0f, 5.0f, 6.0f); + const Vec4f zero; + const Vec4f r = a * zero; + EXPECT_NEAR (r.r, 0.0f, 1.0e-6f); + EXPECT_NEAR (r.g, 0.0f, 1.0e-6f); + EXPECT_NEAR (r.b, 0.0f, 1.0e-6f); + EXPECT_NEAR (r.a, 0.0f, 1.0e-6f); +} + +TEST (Vec4fTests, LerpAtZeroReturnsThis) +{ + const Vec4f a (0.0f, 0.0f, 0.0f, 1.0f); + const Vec4f b (1.0f, 1.0f, 1.0f, 1.0f); + const Vec4f r = a.lerp (b, 0.0f); + EXPECT_NEAR (r.r, a.r, 1.0e-5f); + EXPECT_NEAR (r.g, a.g, 1.0e-5f); + EXPECT_NEAR (r.b, a.b, 1.0e-5f); + EXPECT_NEAR (r.a, a.a, 1.0e-5f); +} + +TEST (Vec4fTests, LerpAtOneReturnsOther) +{ + const Vec4f a (0.0f, 0.0f, 0.0f, 1.0f); + const Vec4f b (1.0f, 0.5f, 0.25f, 0.75f); + const Vec4f r = a.lerp (b, 1.0f); + EXPECT_NEAR (r.r, b.r, 1.0e-5f); + EXPECT_NEAR (r.g, b.g, 1.0e-5f); + EXPECT_NEAR (r.b, b.b, 1.0e-5f); + EXPECT_NEAR (r.a, b.a, 1.0e-5f); +} + +TEST (Vec4fTests, LerpAtHalfIsMidpoint) +{ + const Vec4f a (0.0f, 0.0f, 0.0f, 0.0f); + const Vec4f b (2.0f, 4.0f, 6.0f, 8.0f); + const Vec4f r = a.lerp (b, 0.5f); + EXPECT_NEAR (r.r, 1.0f, 1.0e-5f); + EXPECT_NEAR (r.g, 2.0f, 1.0e-5f); + EXPECT_NEAR (r.b, 3.0f, 1.0e-5f); + EXPECT_NEAR (r.a, 4.0f, 1.0e-5f); +} + +TEST (Vec4fTests, LerpQuarterPoint) +{ + const Vec4f a (0.0f, 0.0f, 0.0f, 0.0f); + const Vec4f b (4.0f, 8.0f, 12.0f, 16.0f); + const Vec4f r = a.lerp (b, 0.25f); + EXPECT_NEAR (r.r, 1.0f, 1.0e-5f); + EXPECT_NEAR (r.g, 2.0f, 1.0e-5f); + EXPECT_NEAR (r.b, 3.0f, 1.0e-5f); + EXPECT_NEAR (r.a, 4.0f, 1.0e-5f); +} + +TEST (Vec4fTests, PremultipliedFullAlpha) +{ + // alpha=1: RGB unchanged + const Vec4f v (0.5f, 0.25f, 0.75f, 1.0f); + const Vec4f p = v.premultiplied(); + EXPECT_NEAR (p.r, 0.5f, 1.0e-6f); + EXPECT_NEAR (p.g, 0.25f, 1.0e-6f); + EXPECT_NEAR (p.b, 0.75f, 1.0e-6f); + EXPECT_NEAR (p.a, 1.0f, 1.0e-6f); +} + +TEST (Vec4fTests, PremultipliedZeroAlpha) +{ + // alpha=0: all channels become 0 + const Vec4f v (1.0f, 1.0f, 1.0f, 0.0f); + const Vec4f p = v.premultiplied(); + EXPECT_NEAR (p.r, 0.0f, 1.0e-6f); + EXPECT_NEAR (p.g, 0.0f, 1.0e-6f); + EXPECT_NEAR (p.b, 0.0f, 1.0e-6f); + EXPECT_NEAR (p.a, 0.0f, 1.0e-6f); +} + +TEST (Vec4fTests, PremultipliedHalfAlpha) +{ + // alpha=0.5: each channel halved + const Vec4f v (0.8f, 0.6f, 0.4f, 0.5f); + const Vec4f p = v.premultiplied(); + EXPECT_NEAR (p.r, 0.4f, 1.0e-6f); + EXPECT_NEAR (p.g, 0.3f, 1.0e-6f); + EXPECT_NEAR (p.b, 0.2f, 1.0e-6f); + EXPECT_NEAR (p.a, 0.5f, 1.0e-6f); +} + +TEST (Vec4fTests, LoadAndStore) +{ + const float src[] = { 1.0f, 2.0f, 3.0f, 4.0f }; + const Vec4f v = Vec4f::load (src); + EXPECT_FLOAT_EQ (v.r, 1.0f); + EXPECT_FLOAT_EQ (v.g, 2.0f); + EXPECT_FLOAT_EQ (v.b, 3.0f); + EXPECT_FLOAT_EQ (v.a, 4.0f); + + float dst[4] = {}; + v.store (dst); + EXPECT_FLOAT_EQ (dst[0], 1.0f); + EXPECT_FLOAT_EQ (dst[1], 2.0f); + EXPECT_FLOAT_EQ (dst[2], 3.0f); + EXPECT_FLOAT_EQ (dst[3], 4.0f); +} + +TEST (Vec4fTests, LoadRoundTrip) +{ + const float original[] = { 7.5f, -3.25f, 0.0f, 1.0f }; + float result[4] = {}; + Vec4f::load (original).store (result); + for (int i = 0; i < 4; ++i) + EXPECT_FLOAT_EQ (result[i], original[i]); +} From abb2c637f54b6586ae19d97773dc7df03e7293f6 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Sat, 30 May 2026 08:52:40 +0200 Subject: [PATCH 4/5] Fix issue --- tests/yup_simd/yup_FloatVectorOperations.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/yup_simd/yup_FloatVectorOperations.cpp b/tests/yup_simd/yup_FloatVectorOperations.cpp index 748a81a8d..13219d79a 100644 --- a/tests/yup_simd/yup_FloatVectorOperations.cpp +++ b/tests/yup_simd/yup_FloatVectorOperations.cpp @@ -462,6 +462,7 @@ TEST_F (FloatVectorOperationsTests, FloatToDoubleAndBack) TEST_F (FloatVectorOperationsTests, MidSideEncodeDecodeRoundTrip) { Random& random = Random::getSystemRandom(); + constexpr float roundTripTolerance = 1.0e-4f; for (int i = 500; --i >= 0;) { @@ -503,8 +504,8 @@ TEST_F (FloatVectorOperationsTests, MidSideEncodeDecodeRoundTrip) for (int j = 0; j < num; ++j) { - EXPECT_NEAR (leftOut[j], left[j], std::numeric_limits::epsilon()); - EXPECT_NEAR (rightOut[j], right[j], std::numeric_limits::epsilon()); + EXPECT_NEAR (leftOut[j], left[j], roundTripTolerance); + EXPECT_NEAR (rightOut[j], right[j], roundTripTolerance); } } } From 8d19bd3b30f3056680ea417b1a813c33aad2b086 Mon Sep 17 00:00:00 2001 From: kunitoki Date: Sat, 30 May 2026 21:47:10 +0200 Subject: [PATCH 5/5] More numeric stability --- .../buffers/yup_ComplexVectorOperations.cpp | 36 ++++++++++--------- tests/yup_dsp/yup_BiquadCascade.cpp | 6 ++-- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/modules/yup_simd/buffers/yup_ComplexVectorOperations.cpp b/modules/yup_simd/buffers/yup_ComplexVectorOperations.cpp index b80fcc185..bd51937a7 100644 --- a/modules/yup_simd/buffers/yup_ComplexVectorOperations.cpp +++ b/modules/yup_simd/buffers/yup_ComplexVectorOperations.cpp @@ -30,20 +30,22 @@ void complexMultiply (const float* __restrict a, const float* __restrict b, floa #if YUP_USE_AVX_INTRINSICS && YUP_USE_FMA_INTRINSICS constexpr int simdWidth = 4; + const __m256 signs = _mm256_set_ps (1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f); + for (; i <= complexPairs - simdWidth; i += simdWidth) { const int idx = i * 2; - __m256 av = _mm256_loadu_ps (a + idx); - __m256 bv = _mm256_loadu_ps (b + idx); - - const __m256 aShuffled = _mm256_permute_ps (av, _MM_SHUFFLE (2, 3, 0, 1)); - const __m256 bShuffled = _mm256_permute_ps (bv, _MM_SHUFFLE (2, 3, 0, 1)); + const __m256 av = _mm256_loadu_ps (a + idx); + const __m256 bv = _mm256_loadu_ps (b + idx); - __m256 realPart = _mm256_fmsub_ps (av, bv, _mm256_mul_ps (aShuffled, bShuffled)); - __m256 imagPart = _mm256_fmadd_ps (av, bShuffled, _mm256_mul_ps (aShuffled, bv)); + const __m256 aReal = _mm256_permute_ps (av, _MM_SHUFFLE (2, 2, 0, 0)); + const __m256 aImag = _mm256_permute_ps (av, _MM_SHUFFLE (3, 3, 1, 1)); + const __m256 bSwapped = _mm256_permute_ps (bv, _MM_SHUFFLE (2, 3, 0, 1)); + const __m256 realProducts = _mm256_mul_ps (aReal, bv); + const __m256 imagProducts = _mm256_mul_ps (aImag, bSwapped); - __m256 interleaved = _mm256_blend_ps (realPart, imagPart, 0b10101010); + __m256 interleaved = _mm256_fmadd_ps (imagProducts, signs, realProducts); if (accumulate) interleaved = _mm256_add_ps (_mm256_loadu_ps (y + idx), interleaved); @@ -53,20 +55,22 @@ void complexMultiply (const float* __restrict a, const float* __restrict b, floa #elif YUP_USE_SSE_INTRINSICS constexpr int simdWidth = 2; + const __m128 signs = _mm_set_ps (1.0f, -1.0f, 1.0f, -1.0f); + for (; i <= complexPairs - simdWidth; i += simdWidth) { const int idx = i * 2; - __m128 av = _mm_loadu_ps (a + idx); - __m128 bv = _mm_loadu_ps (b + idx); - - const __m128 aShuffled = _mm_shuffle_ps (av, av, _MM_SHUFFLE (2, 3, 0, 1)); - const __m128 bShuffled = _mm_shuffle_ps (bv, bv, _MM_SHUFFLE (2, 3, 0, 1)); + const __m128 av = _mm_loadu_ps (a + idx); + const __m128 bv = _mm_loadu_ps (b + idx); - __m128 realPart = _mm_sub_ps (_mm_mul_ps (av, bv), _mm_mul_ps (aShuffled, bShuffled)); - __m128 imagPart = _mm_add_ps (_mm_mul_ps (av, bShuffled), _mm_mul_ps (aShuffled, bv)); + const __m128 aReal = _mm_shuffle_ps (av, av, _MM_SHUFFLE (2, 2, 0, 0)); + const __m128 aImag = _mm_shuffle_ps (av, av, _MM_SHUFFLE (3, 3, 1, 1)); + const __m128 bSwapped = _mm_shuffle_ps (bv, bv, _MM_SHUFFLE (2, 3, 0, 1)); + const __m128 realProducts = _mm_mul_ps (aReal, bv); + const __m128 imagProducts = _mm_mul_ps (aImag, bSwapped); - __m128 interleaved = _mm_unpacklo_ps (realPart, imagPart); + __m128 interleaved = _mm_add_ps (realProducts, _mm_mul_ps (imagProducts, signs)); if (accumulate) interleaved = _mm_add_ps (_mm_loadu_ps (y + idx), interleaved); diff --git a/tests/yup_dsp/yup_BiquadCascade.cpp b/tests/yup_dsp/yup_BiquadCascade.cpp index 86f393b5f..e9ae8bc02 100644 --- a/tests/yup_dsp/yup_BiquadCascade.cpp +++ b/tests/yup_dsp/yup_BiquadCascade.cpp @@ -398,11 +398,11 @@ TEST_F (BiquadCascadeFilterTests, StabilityCheck) auto coeffs = FilterDesigner::designRbjLowpass (5000.0, 50.0, sampleRate); cascadeFloat.setSectionCoefficients (0, coeffs); - // Process white noise-like signal + // Process deterministic white noise-like signal std::vector noiseInput (blockSize); - WhiteNoise noise; + WhiteNoise noise (12345); for (int i = 0; i < blockSize; ++i) - noiseInput[i] = noise.getNextSample(); + noiseInput[i] = 0.1f * noise.getNextSample(); cascadeFloat.processBlock (noiseInput.data(), outputData.data(), blockSize);