diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2b18444..3af0123 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -18,8 +18,8 @@ jobs:
         compiler:
           - { name: gcc-11, cc: gcc-11, cxx: g++-11 }
           - { name: gcc-12, cc: gcc-12, cxx: g++-12 }
-          - { name: clang-14, cc: clang-14, cxx: clang++-14 }
-          - { name: clang-15, cc: clang-15, cxx: clang++-15 }
+          - { name: clang-16, cc: clang-16, cxx: clang++-16 }
+          - { name: clang-17, cc: clang-17, cxx: clang++-17 }
           - { name: msvc, cc: cl, cxx: cl }
         exclude:
           # MSVC only on Windows
@@ -32,16 +32,16 @@ jobs:
             compiler: { name: gcc-11, cc: gcc-11, cxx: g++-11 }
           - os: windows-latest
             compiler: { name: gcc-12, cc: gcc-12, cxx: g++-12 }
-          # Older Clang not on Windows
+          # Clang not on Windows (use MSVC)
           - os: windows-latest
-            compiler: { name: clang-14, cc: clang-14, cxx: clang++-14 }
+            compiler: { name: clang-16, cc: clang-16, cxx: clang++-16 }
           - os: windows-latest
-            compiler: { name: clang-15, cc: clang-15, cxx: clang++-15 }
+            compiler: { name: clang-17, cc: clang-17, cxx: clang++-17 }
           # macOS uses AppleClang, exclude Linux clangs
           - os: macos-latest
-            compiler: { name: clang-14, cc: clang-14, cxx: clang++-14 }
+            compiler: { name: clang-16, cc: clang-16, cxx: clang++-16 }
           - os: macos-latest
-            compiler: { name: clang-15, cc: clang-15, cxx: clang++-15 }
+            compiler: { name: clang-17, cc: clang-17, cxx: clang++-17 }
           # macOS doesn't have these GCC versions easily
           - os: macos-latest
             compiler: { name: gcc-11, cc: gcc-11, cxx: g++-11 }
@@ -134,7 +134,9 @@ jobs:
           # On Windows, ensure PATH includes DLL directories
           export PATH="${{ github.workspace }}/build/${{ matrix.build_type }}:${{ github.workspace }}/build/tests/${{ matrix.build_type }}:${{ github.workspace }}/build:$PATH"
         fi
-        ctest -C ${{ matrix.build_type }} --output-on-failure --parallel
+        # Exclude timing and benchmark tests: speedup assertions require a quiet
+        # dedicated machine and serial execution. Run them locally with -j1 -L timing.
+        ctest -C ${{ matrix.build_type }} --output-on-failure --parallel -LE "timing|benchmark"
 
     - name: Benchmark (Release only)
       if: matrix.build_type == 'Release'
@@ -238,12 +240,16 @@ jobs:
       run: |
         cd build
         # Run tests sequentially for coverage to avoid profile data corruption
-        ctest --output-on-failure
+        # Exclude timing tests for the same reason as the main CI job.
+        ctest --output-on-failure -LE "timing|benchmark"
 
     - name: Generate coverage report
       run: |
         # Explicitly use gcov-11 for coverage collection
-        lcov --gcov-tool gcov-11 --directory build --capture --output-file coverage.info
+        # --ignore-errors gcov suppresses missing .gcno errors for CMakeTmp probe
+        # artifacts generated during configure-time SIMD detection; those files
+        # have no corresponding .gcno because CMakeTmp is cleaned up after cmake.
+        lcov --gcov-tool gcov-11 --directory build --capture --output-file coverage.info --ignore-errors gcov,mismatch
         lcov --remove coverage.info '/usr/*' '*/tests/*' '*/examples/*' --output-file coverage.info --ignore-errors unused
         lcov --list coverage.info
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3992ed7..efe6549 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -8,6 +8,17 @@ endif()
 # Verbose messaging control
 option(LIBSTATS_VERBOSE_BUILD "Enable verbose build messages for debugging" OFF)
 
+# Compiler selection: opt in to Homebrew LLVM on macOS. WARNING: enabling this requires ALL
+# consumers (shared libraries, Python extensions, test executables) to link the same Homebrew
+# libc++. If any consumer uses Apple libc++ instead, C++ exceptions and std::type_info records will
+# not match across the boundary, causing silent catch failures. The default (OFF) uses system
+# AppleClang with Apple libc++ and GCD for parallelism, which is ABI-safe for all consumers. Only
+# set this to ON if you own the entire toolchain and have verified ABI consistency end-to-end.
+option(
+    LIBSTATS_USE_HOMEBREW_LLVM
+    "Use Homebrew LLVM instead of system AppleClang on macOS (requires matching libc++ in all consumers)"
+    OFF)
+
 # Threading system preference control
 option(LIBSTATS_FORCE_TBB
        "Force TBB usage even on platforms with native threading (e.g., GCD on macOS)" OFF)
@@ -38,9 +49,11 @@ if(CMAKE_GENERATOR STREQUAL "Unix Makefiles" OR CMAKE_GENERATOR STREQUAL "MinGW
 endif()
 
 # Set parallel options for CMake build command (for use with cmake --build)
-set(CMAKE_BUILD_PARALLEL_LEVEL
-    ${CPU_COUNT}
-    CACHE STRING "Number of parallel build jobs" FORCE)
+if(NOT DEFINED CMAKE_BUILD_PARALLEL_LEVEL)
+    set(CMAKE_BUILD_PARALLEL_LEVEL
+        ${CPU_COUNT}
+        CACHE STRING "Number of parallel build jobs")
+endif()
 if(LIBSTATS_VERBOSE_BUILD)
     message(STATUS "Set CMAKE_BUILD_PARALLEL_LEVEL=${CPU_COUNT} for cmake --build")
 endif()
@@ -120,18 +133,24 @@ function(detect_homebrew_llvm)
     endif()
 endfunction()
 
-# Apply Homebrew LLVM detection (macOS/Linux only)
-if(APPLE OR (UNIX AND NOT WIN32))
+# Apply Homebrew LLVM detection (macOS only — Homebrew paths are macOS-specific)
+if(APPLE)
     detect_homebrew_llvm()
 
-    if(USING_HOMEBREW_LLVM)
-        # Configure Homebrew LLVM
+    if(USING_HOMEBREW_LLVM AND LIBSTATS_USE_HOMEBREW_LLVM)
+        # Opt-in: use Homebrew LLVM. All consumers must link the same Homebrew libc++.
         set(CMAKE_C_COMPILER "${LLVM_ROOT}/bin/clang")
         set(CMAKE_CXX_COMPILER "${LLVM_ROOT}/bin/clang++")
         set(CMAKE_PREFIX_PATH "${LLVM_ROOT}")
-        message(STATUS "Using Homebrew LLVM from ${LLVM_ROOT}")
+        message(STATUS "Using Homebrew LLVM from ${LLVM_ROOT} (LIBSTATS_USE_HOMEBREW_LLVM=ON)")
     else()
-        # Fallback to system compiler
+        # Default: system AppleClang with Apple libc++ - ABI-safe for all consumers.
+        if(USING_HOMEBREW_LLVM)
+            message(
+                STATUS
+                    "Homebrew LLVM found but not used (LIBSTATS_USE_HOMEBREW_LLVM=OFF) - using system AppleClang for ABI safety"
+            )
+        endif()
         find_program(CMAKE_C_COMPILER clang)
         find_program(CMAKE_CXX_COMPILER clang++)
         if(LIBSTATS_VERBOSE_BUILD)
@@ -224,9 +243,9 @@ if(APPLE)
     # macOS specific configuration
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
 
-    if(USING_HOMEBREW_LLVM)
-        # Homebrew LLVM specific configuration - Force LLVM libc++ to get C++20 execution policies
-        # Note: rpath will be set per-target to avoid duplication warnings
+    if(USING_HOMEBREW_LLVM AND LIBSTATS_USE_HOMEBREW_LLVM)
+        # Opt-in Homebrew LLVM configuration: force LLVM libc++ headers for std::execution policies.
+        # Note: rpath will be set per-target to avoid duplication warnings.
 
         # CRITICAL: Include LLVM libc++ headers BEFORE system headers to get C++20 execution
         # policies
@@ -241,8 +260,9 @@ if(APPLE)
         # Enable experimental PSTL support in LLVM libc++
         set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_LIBCPP_HAS_EXPERIMENTAL_PSTL=1")
     else()
-        # System compiler configuration - use system libc++
-        message(STATUS "Using system libc++ with Apple Clang")
+        # Default: system AppleClang with Apple libc++. Parallelism uses GCD (already implemented in
+        # parallel_execution.h). ABI-safe for all consumers.
+        message(STATUS "Using system libc++ with Apple Clang (GCD parallel path active)")
     endif()
 
     # Comprehensive Threading System Detection with Caching Cache results to avoid repeated
@@ -274,7 +294,9 @@ if(APPLE)
             if(TBB_FOUND)
                 include_directories(${TBB_INCLUDE_DIRS})
                 link_directories(${TBB_LIBRARY_DIRS})
-                set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TBB_CFLAGS_OTHER}")
+                set(CMAKE_CXX_FLAGS
+                    "${CMAKE_CXX_FLAGS} ${TBB_CFLAGS_OTHER}"
+                    PARENT_SCOPE)
                 set(LIBSTATS_HAS_TBB TRUE)
                 message(
                     STATUS "    ✓ TBB found via pkg-config - parallel execution policies enhanced")
@@ -457,7 +479,9 @@ elseif(UNIX AND NOT APPLE)
                 if(TBB_FOUND)
                     include_directories(${TBB_INCLUDE_DIRS})
                     link_directories(${TBB_LIBRARY_DIRS})
-                    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TBB_CFLAGS_OTHER}")
+                    set(CMAKE_CXX_FLAGS
+                        "${CMAKE_CXX_FLAGS} ${TBB_CFLAGS_OTHER}"
+                        PARENT_SCOPE)
                     set(LIBSTATS_HAS_TBB
                         TRUE
                         CACHE BOOL "Intel TBB support available")
@@ -690,15 +714,6 @@ elseif(WIN32)
     # Execute comprehensive threading detection for Windows
     detect_threading_systems_windows()
 
-    # Find GTest using find_package (no Homebrew or pkg-config on Windows)
-    find_package(GTest QUIET)
-    if(GTest_FOUND)
-        set(GTEST_FOUND TRUE)
-        message(STATUS "GTest found via find_package (Windows)")
-    else()
-        set(GTEST_FOUND FALSE)
-        message(STATUS "GTest not found - GTest-based tests will be skipped (Windows)")
-    endif()
 endif()
 
 # SIMD feature detection and compilation flags Use our comprehensive SIMD detection system
@@ -706,9 +721,6 @@ include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/SIMDDetection.cmake")
 
 # Set options for SIMD detection behavior
 option(LIBSTATS_ENABLE_RUNTIME_CHECKS "Enable runtime CPU checks even when cross-compiling" OFF)
-option(LIBSTATS_CONSERVATIVE_SIMD "Use conservative SIMD settings (disable newer instruction sets)"
-       OFF)
-
 # Perform comprehensive SIMD detection
 detect_simd_features()
 
@@ -744,7 +756,9 @@ function(detect_tbb_unified)
             if(TBB_FOUND)
                 include_directories(${TBB_INCLUDE_DIRS})
                 link_directories(${TBB_LIBRARY_DIRS})
-                set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TBB_CFLAGS_OTHER}")
+                set(CMAKE_CXX_FLAGS
+                    "${CMAKE_CXX_FLAGS} ${TBB_CFLAGS_OTHER}"
+                    PARENT_SCOPE)
                 set(LIBSTATS_HAS_TBB TRUE)
                 if(LIBSTATS_VERBOSE_BUILD)
                     message(STATUS "  ✓ TBB found via pkg-config")
@@ -945,11 +959,6 @@ set(LIBSTATS_MSVC_ENHANCED_WARNINGS
     /we4239 # Nonstandard extension used (can catch duplicate definitions)
 )
 
-# MSVC-specific linker flags for ODR detection
-set(LIBSTATS_MSVC_ODR_LINKER_FLAGS /FORCE:MULTIPLE # Force link even with multiply-defined symbols
-                                                   # (to catch ODR issues)
-)
-
 # Optimization levels for different build types
 set(LIBSTATS_OPT_NONE_UNIX -O0)
 set(LIBSTATS_OPT_LIGHT_UNIX -O1)
@@ -1470,9 +1479,11 @@ endif()
 
 # Create SIMD interface target for modern CMake approach
 create_simd_interface_target()
+# Apply per-source-file SIMD compile flags once (file-global properties).
+apply_simd_source_flags()
 
-# Configure SIMD compilation for all object libraries and final targets This ensures SIMD-specific
-# compile flags are applied correctly to each component
+# Link the SIMD interface target to every object library and final library so all TUs receive the
+# LIBSTATS_HAS_* compile definitions.
 configure_simd_target(libstats_foundation_obj)
 configure_simd_target(libstats_core_utilities_obj)
 configure_simd_target(libstats_platform_obj)
@@ -1501,60 +1512,60 @@ if(LIBSTATS_BUILD_TESTS)
     # Create tests directory in build folder
     file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tests)
 
-    # Find GTest (Homebrew installation) First, try to use Homebrew's GTest installation directly
-    # Check both Intel and ARM64 paths
-    set(GTEST_ROOT_INTEL "/usr/local/opt/googletest")
-    set(GTEST_ROOT_ARM "/opt/homebrew/opt/googletest")
-
-    # Determine which Homebrew path to use
-    if(EXISTS "${GTEST_ROOT_ARM}/lib/cmake/GTest")
-        set(GTEST_ROOT "${GTEST_ROOT_ARM}")
-        message(STATUS "Using ARM64 Homebrew GTest path")
-    elseif(EXISTS "${GTEST_ROOT_INTEL}/lib/cmake/GTest")
-        set(GTEST_ROOT "${GTEST_ROOT_INTEL}")
-        message(STATUS "Using Intel Homebrew GTest path")
-    else()
-        set(GTEST_ROOT "${GTEST_ROOT_ARM}")
-        message(STATUS "Using default ARM64 Homebrew GTest path (may not exist)")
-    endif()
-
-    set(CMAKE_PREFIX_PATH "${CMAKE_PREFIX_PATH};${GTEST_ROOT}")
-
-    # Try find_package first with explicit path
-    find_package(GTest QUIET HINTS ${GTEST_ROOT}/lib/cmake/GTest)
+    # GTest detection — three-step: generic find_package, then Apple/Homebrew probe, then
+    # FetchContent as a self-contained fallback (matches libhmm test pattern).
 
+    # Step 1: plain find_package — covers vcpkg, system installs, and previously cached results.
+    if(NOT GTest_FOUND)
+        find_package(GTest QUIET)
+    endif()
     if(GTest_FOUND OR TARGET GTest::gtest)
         set(GTEST_FOUND TRUE)
         message(STATUS "GTest found via find_package")
-    else()
-        # Fallback to pkg-config
-        find_package(PkgConfig QUIET)
-        if(PkgConfig_FOUND)
-            # Set PKG_CONFIG_PATH to include Homebrew's pkgconfig directory
-            set(ENV{PKG_CONFIG_PATH} "${GTEST_ROOT}/lib/pkgconfig:$ENV{PKG_CONFIG_PATH}")
-            pkg_check_modules(GTEST QUIET gtest)
-            pkg_check_modules(GTEST_MAIN QUIET gtest_main)
-            if(GTEST_FOUND)
-                message(STATUS "GTest found via pkg-config")
-            endif()
-        endif()
+    endif()
 
-        # Manual fallback if both methods fail
-        if(NOT GTEST_FOUND)
-            # Check if the Homebrew installation exists manually
-            if(EXISTS "${GTEST_ROOT}/lib/libgtest.a" AND EXISTS
-                                                         "${GTEST_ROOT}/include/gtest/gtest.h")
+    # Step 2: macOS only — probe architecture-appropriate Homebrew path.
+    if(NOT GTEST_FOUND AND APPLE)
+        if(EXISTS "/opt/homebrew/opt/googletest/lib/cmake/GTest")
+            set(GTEST_ROOT "/opt/homebrew/opt/googletest")
+        elseif(EXISTS "/usr/local/opt/googletest/lib/cmake/GTest")
+            set(GTEST_ROOT "/usr/local/opt/googletest")
+        endif()
+        if(DEFINED GTEST_ROOT)
+            find_package(GTest QUIET HINTS "${GTEST_ROOT}/lib/cmake/GTest")
+            if(GTest_FOUND OR TARGET GTest::gtest)
+                set(GTEST_FOUND TRUE)
+                message(STATUS "GTest found via Homebrew at ${GTEST_ROOT}")
+            elseif(EXISTS "${GTEST_ROOT}/lib/libgtest.a" AND EXISTS
+                                                             "${GTEST_ROOT}/include/gtest/gtest.h")
                 set(GTEST_FOUND TRUE)
                 set(GTEST_INCLUDE_DIRS "${GTEST_ROOT}/include")
                 set(GTEST_LIBRARIES "${GTEST_ROOT}/lib/libgtest.a")
                 set(GTEST_MAIN_LIBRARIES "${GTEST_ROOT}/lib/libgtest_main.a")
                 message(STATUS "GTest found manually at ${GTEST_ROOT}")
-            else()
-                message(STATUS "GTest not found - GTest-based tests will be skipped")
             endif()
         endif()
     endif()
 
+    # Step 3: FetchContent fallback — self-contained for CI and machines without a system GTest.
+    if(NOT GTEST_FOUND
+       AND NOT TARGET GTest::gtest
+       AND NOT TARGET gtest)
+        message(STATUS "GTest not found locally - fetching via FetchContent")
+        include(FetchContent)
+        FetchContent_Declare(
+            googletest
+            GIT_REPOSITORY https://github.com/google/googletest.git
+            GIT_TAG v1.17.0
+            GIT_SHALLOW TRUE)
+        # Prevent GTest from overriding the project's CRT choice on Windows.
+        set(gtest_force_shared_crt
+            ON
+            CACHE BOOL "" FORCE)
+        FetchContent_MakeAvailable(googletest)
+        set(GTEST_FOUND TRUE)
+    endif()
+
     # Common test configuration function - reduces code duplication across test types
     function(configure_common_test_settings TEST_NAME)
         # Set output directory to build/tests
@@ -1887,8 +1898,12 @@ if(LIBSTATS_BUILD_TESTS)
     # missing targets.
     if(GTEST_FOUND)
         set_tests_properties(
-            test_gaussian_enhanced test_chi_squared_enhanced test_student_t_enhanced
-            test_beta_enhanced test_performance_dispatcher
+            test_math_comprehensive # vectorized math speedup assertions
+            test_gaussian_enhanced
+            test_chi_squared_enhanced
+            test_student_t_enhanced
+            test_beta_enhanced
+            test_performance_dispatcher
             test_system_capabilities # runs live SIMD/threading/bandwidth benchmarks
             PROPERTIES LABELS "timing")
         if(LIBSTATS_HAS_REQUIRES_EXPRESSIONS)
diff --git a/cmake/DetectAVX512.cmake b/cmake/DetectAVX512.cmake
deleted file mode 100644
index 5e99ea4..0000000
--- a/cmake/DetectAVX512.cmake
+++ /dev/null
@@ -1,203 +0,0 @@
-# DetectAVX512.cmake - Advanced AVX-512 detection and configuration This module provides
-# comprehensive AVX-512 support detection for libstats
-
-# Options for AVX-512 control
-option(LIBSTATS_ENABLE_AVX512 "Enable AVX-512 support if available" ON)
-option(LIBSTATS_FORCE_AVX512 "Force AVX-512 compilation even without runtime detection" OFF)
-option(LIBSTATS_TEST_AVX512_COMPILATION "Test AVX-512 compilation during configuration" ON)
-
-# Function to test AVX-512 compilation
-function(test_avx512_compilation)
-    if(NOT LIBSTATS_TEST_AVX512_COMPILATION)
-        return()
-    endif()
-
-    message(STATUS "Testing AVX-512 compilation support...")
-
-    # Create test source
-    set(AVX512_TEST_SOURCE
-        "
-        #include <immintrin.h>
-        int main() {
-            __m512d a = _mm512_setzero_pd();
-            __m512d b = _mm512_set1_pd(1.0);
-            __m512d c = _mm512_add_pd(a, b);
-            __m512d d = _mm512_fmadd_pd(a, b, c);
-            double result[8];
-            _mm512_storeu_pd(result, d);
-            return (int)result[0];
-        }
-    ")
-
-    # Test compilation with different flag combinations
-    set(AVX512_FLAGS_TO_TEST
-        "-mavx512f" "-mavx512f -mavx512dq" "-mavx512f -mavx512dq -mavx512bw -mavx512vl"
-        "-march=skylake-avx512" "-march=native")
-
-    foreach(flags ${AVX512_FLAGS_TO_TEST})
-        string(REPLACE " " "_" flag_name "${flags}")
-        string(REPLACE "-" "_" flag_name "${flag_name}")
-
-        try_compile(
-            AVX512_COMPILE_${flag_name} ${CMAKE_BINARY_DIR}/cmake_temp
-            SOURCES ${CMAKE_BINARY_DIR}/cmake_temp/avx512_test.cpp
-            CMAKE_FLAGS "-DCMAKE_CXX_FLAGS=${flags}")
-
-        if(AVX512_COMPILE_${flag_name})
-            message(STATUS "  ✅ AVX-512 compiles with flags: ${flags}")
-            set(LIBSTATS_AVX512_COMPILE_FLAGS
-                "${flags}"
-                PARENT_SCOPE)
-            set(LIBSTATS_AVX512_COMPILATION_SUPPORTED
-                TRUE
-                PARENT_SCOPE)
-            break()
-        else()
-            message(STATUS "  ❌ AVX-512 failed with flags: ${flags}")
-        endif()
-    endforeach()
-
-    # Write test source to file
-    file(WRITE ${CMAKE_BINARY_DIR}/cmake_temp/avx512_test.cpp "${AVX512_TEST_SOURCE}")
-endfunction()
-
-# Function to detect AVX-512 runtime support
-function(detect_avx512_runtime)
-    message(STATUS "Checking for AVX-512 runtime support...")
-
-    # Create runtime detection program
-    set(AVX512_RUNTIME_TEST
-        "
-        #include <iostream>
-        #include <immintrin.h>
-
-        #ifdef _WIN32
-        #include <intrin.h>
-        #endif
-
-        bool check_avx512_support() {
-            unsigned int eax, ebx, ecx, edx;
-
-            // Check if CPUID is supported
-            #ifdef _WIN32
-                int cpuinfo[4];
-                __cpuid(cpuinfo, 0);
-                if (cpuinfo[0] < 7) return false;
-
-                __cpuidex(cpuinfo, 7, 0);
-                return (cpuinfo[1] & (1 << 16)) != 0; // AVX-512F
-            #else
-                __asm__ (\"cpuid\" : \"=a\"(eax), \"=b\"(ebx), \"=c\"(ecx), \"=d\"(edx) : \"a\"(0));
-                if (eax < 7) return false;
-
-                __asm__ (\"cpuid\" : \"=a\"(eax), \"=b\"(ebx), \"=c\"(ecx), \"=d\"(edx) : \"a\"(7), \"c\"(0));
-                return (ebx & (1 << 16)) != 0; // AVX-512F
-            #endif
-        }
-
-        int main() {
-            if (check_avx512_support()) {
-                std::cout << \"AVX-512 runtime support: YES\" << std::endl;
-                return 0;
-            } else {
-                std::cout << \"AVX-512 runtime support: NO\" << std::endl;
-                return 1;
-            }
-        }
-    ")
-
-    # Write and try to run the test
-    file(WRITE ${CMAKE_BINARY_DIR}/cmake_temp/avx512_runtime_test.cpp "${AVX512_RUNTIME_TEST}")
-
-    try_compile(
-        AVX512_RUNTIME_COMPILE ${CMAKE_BINARY_DIR}/cmake_temp
-        SOURCES ${CMAKE_BINARY_DIR}/cmake_temp/avx512_runtime_test.cpp
-        CMAKE_FLAGS "-DCMAKE_CXX_FLAGS=${LIBSTATS_AVX512_COMPILE_FLAGS}"
-        COPY_FILE ${CMAKE_BINARY_DIR}/cmake_temp/avx512_runtime_test)
-
-    if(AVX512_RUNTIME_COMPILE)
-        execute_process(
-            COMMAND ${CMAKE_BINARY_DIR}/cmake_temp/avx512_runtime_test
-            RESULT_VARIABLE AVX512_RUNTIME_RESULT
-            OUTPUT_VARIABLE AVX512_RUNTIME_OUTPUT
-            ERROR_QUIET)
-
-        if(AVX512_RUNTIME_RESULT EQUAL 0)
-            message(STATUS "  ✅ ${AVX512_RUNTIME_OUTPUT}")
-            set(LIBSTATS_AVX512_RUNTIME_SUPPORTED
-                TRUE
-                PARENT_SCOPE)
-        else()
-            message(STATUS "  ❌ ${AVX512_RUNTIME_OUTPUT}")
-            set(LIBSTATS_AVX512_RUNTIME_SUPPORTED
-                FALSE
-                PARENT_SCOPE)
-        endif()
-    else()
-        message(STATUS "  ❌ Failed to compile AVX-512 runtime test")
-        set(LIBSTATS_AVX512_RUNTIME_SUPPORTED
-            FALSE
-            PARENT_SCOPE)
-    endif()
-endfunction()
-
-# Main AVX-512 detection logic
-if(LIBSTATS_ENABLE_AVX512 OR LIBSTATS_FORCE_AVX512)
-    message(STATUS "=== AVX-512 Detection ===")
-
-    # Ensure temp directory exists
-    file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/cmake_temp)
-
-    # Test compilation support
-    test_avx512_compilation()
-
-    if(LIBSTATS_AVX512_COMPILATION_SUPPORTED OR LIBSTATS_FORCE_AVX512)
-        # Test runtime support (only if we can compile)
-        if(NOT CMAKE_CROSSCOMPILING AND NOT LIBSTATS_FORCE_AVX512)
-            detect_avx512_runtime()
-        else()
-            message(STATUS "Skipping AVX-512 runtime detection (cross-compiling or forced)")
-            set(LIBSTATS_AVX512_RUNTIME_SUPPORTED TRUE)
-        endif()
-
-        # Set final configuration
-        if(LIBSTATS_AVX512_RUNTIME_SUPPORTED OR LIBSTATS_FORCE_AVX512)
-            set(LIBSTATS_HAS_AVX512
-                TRUE
-                CACHE BOOL "AVX-512 support available" FORCE)
-            message(STATUS "✅ AVX-512 support enabled")
-
-            if(LIBSTATS_AVX512_COMPILE_FLAGS)
-                message(STATUS "AVX-512 compile flags: ${LIBSTATS_AVX512_COMPILE_FLAGS}")
-            endif()
-        else()
-            set(LIBSTATS_HAS_AVX512
-                FALSE
-                CACHE BOOL "AVX-512 support not available" FORCE)
-            message(STATUS "❌ AVX-512 runtime not supported")
-        endif()
-    else()
-        set(LIBSTATS_HAS_AVX512
-            FALSE
-            CACHE BOOL "AVX-512 compilation not supported" FORCE)
-        message(STATUS "❌ AVX-512 compilation not supported")
-    endif()
-else()
-    set(LIBSTATS_HAS_AVX512
-        FALSE
-        CACHE BOOL "AVX-512 support disabled" FORCE)
-    message(STATUS "AVX-512 support disabled by configuration")
-endif()
-
-# Export results for use in main CMakeLists.txt
-if(LIBSTATS_HAS_AVX512)
-    add_compile_definitions(LIBSTATS_HAS_AVX512)
-    if(LIBSTATS_AVX512_COMPILE_FLAGS)
-        # These flags will be applied to AVX-512 specific files
-        set(CMAKE_CXX_FLAGS_AVX512
-            "${LIBSTATS_AVX512_COMPILE_FLAGS}"
-            CACHE STRING "Flags for AVX-512 compilation" FORCE)
-    endif()
-endif()
-
-message(STATUS "========================")
diff --git a/cmake/SIMDDetection.cmake b/cmake/SIMDDetection.cmake
index 0b48389..9c0b71c 100644
--- a/cmake/SIMDDetection.cmake
+++ b/cmake/SIMDDetection.cmake
@@ -506,11 +506,21 @@ bool test_avx2() {
                 "SIMDDetection: check_cxx_compiler_flag('/arch:AVX512') result = ${COMPILER_SUPPORTS_AVX512}"
         )
         if(COMPILER_SUPPORTS_AVX512)
-            test_runtime_cpu_feature(
-                "avx512"
-                "#include <immintrin.h>\\nbool test_avx512() {\\n    __m512d a = _mm512_set1_pd(1.0);\\n    __m512d b = _mm512_set1_pd(2.0);\\n    __m512d c = _mm512_add_pd(a, b);\\n    double result[8];\\n    _mm512_storeu_pd(result, c);\\n    for(int i=0; i<8; ++i) if(result[i] != 3.0) return false;\\n    return true;\\n}"
-                RUNTIME_SUPPORTS_AVX512
-                "/arch:AVX512")
+            # check_cxx_compiler_flag only tests that MSVC accepts the flag syntax, not
+            # that the build machine's CPU can execute the resulting instructions.
+            # Use check_cxx_source_runs (compile + execute) to verify actual CPU support.
+            include(CheckCXXSourceRuns)
+            set(_avx512_saved_req_flags "${CMAKE_REQUIRED_FLAGS}")
+            set(CMAKE_REQUIRED_FLAGS "/arch:AVX512")
+            check_cxx_source_runs(
+                "#include <intrin.h>
+int main() {
+    __m512d x = _mm512_setzero_pd();
+    (void)x;
+    return 0;
+}"
+                RUNTIME_SUPPORTS_AVX512)
+            set(CMAKE_REQUIRED_FLAGS "${_avx512_saved_req_flags}")
             if(RUNTIME_SUPPORTS_AVX512)
                 set(LIBSTATS_HAS_AVX512
                     TRUE
@@ -531,9 +541,9 @@ bool test_avx2() {
                 set(LIBSTATS_SIMD_DEFINITIONS
                     "${_definitions}"
                     CACHE INTERNAL "List of SIMD compile definitions" FORCE)
-                message(STATUS "SIMD: AVX-512 enabled (compiler + runtime)")
+                message(STATUS "SIMD: AVX-512 enabled (compiler + runtime, CPU verified)")
             else()
-                message(STATUS "SIMD: AVX-512 disabled (runtime check failed)")
+                message(STATUS "SIMD: AVX-512 disabled (CPU does not support it)")
             endif()
         else()
             message(STATUS "SIMD: AVX-512 disabled (compiler not supported)")
@@ -543,7 +553,16 @@ bool test_avx2() {
         if(COMPILER_SUPPORTS_AVX512)
             test_runtime_cpu_feature(
                 "avx512"
-                "#include <immintrin.h>\\nbool test_avx512() {\\n    __m512d a = _mm512_set1_pd(1.0);\\n    __m512d b = _mm512_set1_pd(2.0);\\n    __m512d c = _mm512_add_pd(a, b);\\n    double result[8];\\n    _mm512_storeu_pd(result, c);\\n    for(int i=0; i<8; ++i) if(result[i] != 3.0) return false;\\n    return true;\\n}"
+                "#include <immintrin.h>
+bool test_avx512() {
+    __m512d a = _mm512_set1_pd(1.0);
+    __m512d b = _mm512_set1_pd(2.0);
+    __m512d c = _mm512_add_pd(a, b);
+    double result[8];
+    _mm512_storeu_pd(result, c);
+    for(int i=0; i<8; ++i) if(result[i] != 3.0) return false;
+    return true;
+}"
                 RUNTIME_SUPPORTS_AVX512
                 "-mavx512f")
             if(RUNTIME_SUPPORTS_AVX512)
@@ -722,8 +741,9 @@ function(create_simd_interface_target)
     message(STATUS "SIMD interface target created with definitions: ${_definitions}")
 endfunction()
 
-# Function to configure SIMD compilation for a target using modern CMake approach
-function(configure_simd_target TARGET_NAME)
+# Apply per-source-file SIMD compile flags. These are file-global properties and need to be set
+# only once; call this function once after create_simd_interface_target().
+function(apply_simd_source_flags)
     get_property(
         _sse2
         CACHE LIBSTATS_HAS_SSE2
@@ -745,12 +765,6 @@ function(configure_simd_target TARGET_NAME)
         CACHE LIBSTATS_HAS_NEON
         PROPERTY VALUE)
 
-    # Link to SIMD interface target for definitions (modern CMake approach)
-    if(TARGET libstats_simd_interface)
-        target_link_libraries(${TARGET_NAME} PRIVATE libstats::simd)
-    endif()
-
-    # Configure source-specific SIMD compilation flags
     if(_sse2)
         if(MSVC OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND WIN32))
             set_source_files_properties("${CMAKE_CURRENT_SOURCE_DIR}/src/simd_sse2.cpp"
@@ -801,3 +815,11 @@ function(configure_simd_target TARGET_NAME)
         endif()
     endif()
 endfunction()
+
+# Link the SIMD interface target (with its compile definitions) to a specific library or object
+# target. Call this for every target that compiles any source in the project.
+function(configure_simd_target TARGET_NAME)
+    if(TARGET libstats_simd_interface)
+        target_link_libraries(${TARGET_NAME} PRIVATE libstats::simd)
+    endif()
+endfunction()
diff --git a/include/common/utility_common.h b/include/common/utility_common.h
index 170a560..c4e1258 100644
--- a/include/common/utility_common.h
+++ b/include/common/utility_common.h
@@ -39,6 +39,19 @@
 #else
     #define LIBSTATS_NEEDS_CATALINA_CONCEPT_SYNTAX_FALLBACK 0
 #endif
+
+// C++20 branch-prediction attributes: [[likely]] and [[unlikely]].
+// AppleClang 12 (Catalina) does not implement these attributes and emits
+// -Wunknown-attributes. Use __has_cpp_attribute for a standard, portable check
+// rather than a compiler-version guard so that any future compiler that adds
+// support gets the attributes automatically.
+#if __has_cpp_attribute(likely)
+    #define LIBSTATS_LIKELY [[likely]]
+    #define LIBSTATS_UNLIKELY [[unlikely]]
+#else
+    #define LIBSTATS_LIKELY
+    #define LIBSTATS_UNLIKELY
+#endif
 #include <functional>
 #include <span>
 #include <stdexcept>  // Exception types
diff --git a/include/core/math_utils.h b/include/core/math_utils.h
index 46801cf..e76007e 100644
--- a/include/core/math_utils.h
+++ b/include/core/math_utils.h
@@ -465,13 +465,16 @@ namespace detail {
  * @return log(1 + exp(x))
  */
 [[nodiscard]] inline double log1pexp(double x) noexcept {
-    if (x > detail::LOG1PEXP_LARGE_THRESHOLD) [[likely]] {
-        return x;  // exp(x) dominates
-    } else if (x > detail::LOG1PEXP_SMALL_THRESHOLD) {
+    if (x > detail::LOG1PEXP_LARGE_THRESHOLD)
+        LIBSTATS_LIKELY {
+            return x;  // exp(x) dominates
+        }
+    else if (x > detail::LOG1PEXP_SMALL_THRESHOLD) {
         return std::log1p(std::exp(x));
-    } else [[unlikely]] {
-        return std::exp(x);  // 1 + exp(x) ≈ 1
-    }
+    } else
+        LIBSTATS_UNLIKELY {
+            return std::exp(x);  // 1 + exp(x) ≈ 1
+        }
 }
 
 /**
@@ -480,9 +483,11 @@ namespace detail {
  * @return log(exp(x) - 1)
  */
 [[nodiscard]] inline double logexpm1(double x) noexcept {
-    if (x > detail::LOG1PEXP_LARGE_THRESHOLD) [[likely]] {
-        return x;  // exp(x) dominates
-    } else {
+    if (x > detail::LOG1PEXP_LARGE_THRESHOLD)
+        LIBSTATS_LIKELY {
+            return x;  // exp(x) dominates
+        }
+    else {
         return std::log(std::expm1(x));
     }
 }
@@ -494,9 +499,11 @@ namespace detail {
  * @return log(x + y)
  */
 [[nodiscard]] inline double log_sum_exp(double log_x, double log_y) noexcept {
-    if (log_x > log_y) [[likely]] {
-        return log_x + std::log1p(std::exp(log_y - log_x));
-    } else {
+    if (log_x > log_y)
+        LIBSTATS_LIKELY {
+            return log_x + std::log1p(std::exp(log_y - log_x));
+        }
+    else {
         return log_y + std::log1p(std::exp(log_x - log_y));
     }
 }
@@ -522,9 +529,10 @@ LIBSTATS_CONSTRAINED_NODISCARD constexpr bool is_safe_float(T x) noexcept {
 template <typename T>
     requires FloatingPoint<T>
 LIBSTATS_CONSTRAINED_NODISCARD constexpr T clamp_safe(T x, T min_val, T max_val) noexcept {
-    if (std::isnan(x)) [[unlikely]] {
-        return min_val;
-    }
+    if (std::isnan(x))
+        LIBSTATS_UNLIKELY {
+            return min_val;
+        }
     return std::clamp(x, min_val, max_val);
 }
 
diff --git a/src/cpu_detection.cpp b/src/cpu_detection.cpp
index 4fe71e8..c07e7eb 100644
--- a/src/cpu_detection.cpp
+++ b/src/cpu_detection.cpp
@@ -1,14 +1,14 @@
 // CRITICAL: Ensure CPU detection code uses NO advanced SIMD instructions
 // This file detects CPU features and must not use the features it's detecting!
 // Use compiler-specific approaches to disable SIMD in CPU detection code
-#if (defined(__clang__) &&                                                                   \
+#if (defined(__clang__) &&                                                                         \
      (defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)))
     // Clang: use function attributes to disable SIMD per function
     #define CPU_DETECTION_NO_SIMD                                                                  \
         __attribute__((                                                                            \
             target("no-avx512f,no-avx512cd,no-avx512bw,no-avx512dq,no-avx512vl,no-avx2,no-avx,no-" \
                    "sse4.2,no-sse4.1,no-ssse3,no-sse3")))
-#elif (defined(__GNUC__) &&                                                                   \
+#elif (defined(__GNUC__) &&                                                                        \
        (defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)))
     // GCC: use pragma to disable SIMD globally for this file
     #pragma GCC push_options
@@ -451,16 +451,18 @@ Features detect_x86_features() {
         features.avx512f = (ebx & (1 << 16)) != 0;
     }
 
-    // Get brand string if available
+    // Get brand string if available. Each CPUID call returns 4 × 4-byte registers
+    // (eax, ebx, ecx, edx) = 16 bytes of brand string per call, three calls total.
     safe_cpuid(0x80000000, 0, eax, ebx, ecx, edx);
     if (eax >= 0x80000004) {
+        uint32_t regs[4];
         char brand[49] = {0};
-        safe_cpuid(0x80000002, 0, eax, ebx, ecx, edx);
-        memcpy(brand, &eax, 16);
-        safe_cpuid(0x80000003, 0, eax, ebx, ecx, edx);
-        memcpy(brand + 16, &eax, 16);
-        safe_cpuid(0x80000004, 0, eax, ebx, ecx, edx);
-        memcpy(brand + 32, &eax, 16);
+        safe_cpuid(0x80000002, 0, regs[0], regs[1], regs[2], regs[3]);
+        memcpy(brand, regs, 16);
+        safe_cpuid(0x80000003, 0, regs[0], regs[1], regs[2], regs[3]);
+        memcpy(brand + 16, regs, 16);
+        safe_cpuid(0x80000004, 0, regs[0], regs[1], regs[2], regs[3]);
+        memcpy(brand + 32, regs, 16);
         features.brand = brand;
     }
 
@@ -1021,7 +1023,7 @@ bool is_modern_intel() {
 }  // namespace stats
 
 // Restore original compiler SIMD settings (only needed for GCC)
-#if (defined(__GNUC__) && !defined(__clang__) &&                                             \
+#if (defined(__GNUC__) && !defined(__clang__) &&                                                   \
      (defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)))
     #pragma GCC pop_options
 #endif
diff --git a/src/discrete.cpp b/src/discrete.cpp
index b8002fa..f8df0eb 100644
--- a/src/discrete.cpp
+++ b/src/discrete.cpp
@@ -2266,11 +2266,6 @@ void DiscreteDistribution::getCumulativeProbability(std::span<const double> valu
 void DiscreteDistribution::getProbabilityWithStrategy(std::span<const double> values,
                                                       std::span<double> results,
                                                       detail::Strategy strategy) const {
-    // GPU acceleration fallback - GPU implementation not yet available, use optimal CPU strategy
-    if (strategy == detail::Strategy::WORK_STEALING) {
-        strategy = detail::Strategy::WORK_STEALING;
-    }
-
     detail::DispatchUtils::executeWithStrategy(
         *this, values, results, strategy,
         [](const DiscreteDistribution& dist, double value) { return dist.getProbability(value); },
@@ -2418,11 +2413,6 @@ void DiscreteDistribution::getProbabilityWithStrategy(std::span<const double> va
 void DiscreteDistribution::getLogProbabilityWithStrategy(std::span<const double> values,
                                                          std::span<double> results,
                                                          detail::Strategy strategy) const {
-    // GPU acceleration fallback - GPU implementation not yet available, use optimal CPU strategy
-    if (strategy == detail::Strategy::WORK_STEALING) {
-        strategy = detail::Strategy::WORK_STEALING;
-    }
-
     detail::DispatchUtils::executeWithStrategy(
         *this, values, results, strategy,
         [](const DiscreteDistribution& dist, double value) {
@@ -2588,11 +2578,6 @@ void DiscreteDistribution::getLogProbabilityWithStrategy(std::span<const double>
 void DiscreteDistribution::getCumulativeProbabilityWithStrategy(std::span<const double> values,
                                                                 std::span<double> results,
                                                                 detail::Strategy strategy) const {
-    // GPU acceleration fallback - GPU implementation not yet available, use optimal CPU strategy
-    if (strategy == detail::Strategy::WORK_STEALING) {
-        strategy = detail::Strategy::WORK_STEALING;
-    }
-
     detail::DispatchUtils::executeWithStrategy(
         *this, values, results, strategy,
         [](const DiscreteDistribution& dist, double value) {
diff --git a/src/exponential.cpp b/src/exponential.cpp
index 9de8664..81b9405 100644
--- a/src/exponential.cpp
+++ b/src/exponential.cpp
@@ -1843,11 +1843,6 @@ void ExponentialDistribution::getCumulativeProbability(std::span<const double> v
 void ExponentialDistribution::getProbabilityWithStrategy(std::span<const double> values,
                                                          std::span<double> results,
                                                          detail::Strategy strategy) const {
-    // GPU acceleration fallback - GPU implementation not yet available, use optimal CPU strategy
-    if (strategy == detail::Strategy::WORK_STEALING) {
-        strategy = detail::Strategy::WORK_STEALING;
-    }
-
     detail::DispatchUtils::executeWithStrategy(
         *this, values, results, strategy,
         [](const ExponentialDistribution& dist, double value) {
@@ -2004,11 +1999,6 @@ void ExponentialDistribution::getProbabilityWithStrategy(std::span<const double>
 void ExponentialDistribution::getLogProbabilityWithStrategy(std::span<const double> values,
                                                             std::span<double> results,
                                                             detail::Strategy strategy) const {
-    // GPU acceleration fallback - GPU implementation not yet available, use optimal CPU strategy
-    if (strategy == detail::Strategy::WORK_STEALING) {
-        strategy = detail::Strategy::WORK_STEALING;
-    }
-
     detail::DispatchUtils::executeWithStrategy(
         *this, values, results, strategy,
         [](const ExponentialDistribution& dist, double value) {
@@ -2165,11 +2155,6 @@ void ExponentialDistribution::getLogProbabilityWithStrategy(std::span<const doub
 
 void ExponentialDistribution::getCumulativeProbabilityWithStrategy(
     std::span<const double> values, std::span<double> results, detail::Strategy strategy) const {
-    // GPU acceleration fallback - GPU implementation not yet available, use optimal CPU strategy
-    if (strategy == detail::Strategy::WORK_STEALING) {
-        strategy = detail::Strategy::WORK_STEALING;
-    }
-
     detail::DispatchUtils::executeWithStrategy(
         *this, values, results, strategy,
         [](const ExponentialDistribution& dist, double value) {
diff --git a/src/gamma.cpp b/src/gamma.cpp
index ade644f..ec6b749 100644
--- a/src/gamma.cpp
+++ b/src/gamma.cpp
@@ -1959,11 +1959,6 @@ void GammaDistribution::getCumulativeProbability(std::span<const double> values,
 void GammaDistribution::getProbabilityWithStrategy(std::span<const double> values,
                                                    std::span<double> results,
                                                    detail::Strategy strategy) const {
-    // GPU acceleration fallback - GPU implementation not yet available, use optimal CPU strategy
-    if (strategy == detail::Strategy::WORK_STEALING) {
-        strategy = detail::Strategy::WORK_STEALING;
-    }
-
     detail::DispatchUtils::executeWithStrategy(
         *this, values, results, strategy,
         [](const GammaDistribution& dist, double value) { return dist.getProbability(value); },
@@ -2123,11 +2118,6 @@ void GammaDistribution::getProbabilityWithStrategy(std::span<const double> value
 void GammaDistribution::getLogProbabilityWithStrategy(std::span<const double> values,
                                                       std::span<double> results,
                                                       detail::Strategy strategy) const {
-    // GPU acceleration fallback - GPU implementation not yet available, use optimal CPU strategy
-    if (strategy == detail::Strategy::WORK_STEALING) {
-        strategy = detail::Strategy::WORK_STEALING;
-    }
-
     detail::DispatchUtils::executeWithStrategy(
         *this, values, results, strategy,
         [](const GammaDistribution& dist, double value) { return dist.getLogProbability(value); },
@@ -2287,11 +2277,6 @@ void GammaDistribution::getLogProbabilityWithStrategy(std::span<const double> va
 void GammaDistribution::getCumulativeProbabilityWithStrategy(std::span<const double> values,
                                                              std::span<double> results,
                                                              detail::Strategy strategy) const {
-    // GPU acceleration fallback - GPU implementation not yet available, use optimal CPU strategy
-    if (strategy == detail::Strategy::WORK_STEALING) {
-        strategy = detail::Strategy::WORK_STEALING;
-    }
-
     detail::DispatchUtils::executeWithStrategy(
         *this, values, results, strategy,
         [](const GammaDistribution& dist, double value) {
diff --git a/src/libstats_init.cpp b/src/libstats_init.cpp
index d2ad085..009391c 100644
--- a/src/libstats_init.cpp
+++ b/src/libstats_init.cpp
@@ -3,25 +3,27 @@
 #include "libstats/platform/simd_policy.h"
 #include "libstats/platform/thread_pool.h"
 
+#include <atomic>
 #include <mutex>
 
 namespace stats {
 
 void initialize_performance_systems() {
-    // Thread-safe one-time initialization using static local variable
-    static bool initialized = false;
+    // Thread-safe one-time initialization using double-checked locking.
+    // The atomic load on the fast path avoids a data race under the C++ memory model.
+    static std::atomic<bool> initialized{false};
     static std::mutex init_mutex;
 
     // Fast path: if already initialized, return immediately
-    if (initialized) {
+    if (initialized.load(std::memory_order_acquire)) {
         return;
     }
 
     // Slow path: acquire mutex and initialize
     std::lock_guard<std::mutex> lock(init_mutex);
 
-    // Double-check pattern: another thread might have initialized while we waited
-    if (initialized) {
+    // Double-check: another thread might have initialized while we waited
+    if (initialized.load(std::memory_order_acquire)) {
         return;
     }
 
@@ -48,7 +50,7 @@ void initialize_performance_systems() {
         [[maybe_unused]] auto optimal_threads = ThreadPool::getOptimalThreadCount();
 
         // Mark as initialized
-        initialized = true;
+        initialized.store(true, std::memory_order_release);
 
     } catch (...) {
         // If initialization fails, don't mark as initialized
diff --git a/src/performance_history.cpp b/src/performance_history.cpp
index d92cd0e..79cf127 100644
--- a/src/performance_history.cpp
+++ b/src/performance_history.cpp
@@ -70,8 +70,8 @@ PerformanceHistory::StrategyRecommendation PerformanceHistory::getBestStrategy(
     std::unique_lock<std::timed_mutex> lock(data_mutex_);
 
     // Check all available strategies
-    for (auto strategy : {Strategy::SCALAR, Strategy::VECTORIZED, Strategy::PARALLEL,
-                          Strategy::WORK_STEALING, Strategy::WORK_STEALING}) {
+    for (auto strategy :
+         {Strategy::SCALAR, Strategy::VECTORIZED, Strategy::PARALLEL, Strategy::WORK_STEALING}) {
         std::string key = generateKey(strategy, distribution_type, batch_category);
         auto it = performance_data_.find(key);
 
@@ -130,12 +130,10 @@ std::optional<std::pair<std::size_t, std::size_t>> PerformanceHistory::learnOpti
             if (last_underscore != std::string::npos) {
                 std::size_t batch_category = std::stoull(key.substr(last_underscore + 1));
 
-                // Determine strategy from key - improved parsing
+                // Determine strategy from key
                 Strategy strategy = Strategy::SCALAR;
                 if (key.find("WORK_STEALING") != std::string::npos)
                     strategy = Strategy::WORK_STEALING;
-                else if (key.find("WORK_STEALING") != std::string::npos)
-                    strategy = Strategy::WORK_STEALING;
                 else if (key.find("PARALLEL") != std::string::npos)
                     strategy = Strategy::PARALLEL;
                 else if (key.find("VECTORIZED") != std::string::npos)
diff --git a/src/uniform.cpp b/src/uniform.cpp
index fca8b77..51efa1a 100644
--- a/src/uniform.cpp
+++ b/src/uniform.cpp
@@ -1826,11 +1826,6 @@ void UniformDistribution::getCumulativeProbability(std::span<const double> value
 void UniformDistribution::getProbabilityWithStrategy(std::span<const double> values,
                                                      std::span<double> results,
                                                      detail::Strategy strategy) const {
-    // GPU acceleration fallback - GPU implementation not yet available, use optimal CPU strategy
-    if (strategy == detail::Strategy::WORK_STEALING) {
-        strategy = detail::Strategy::WORK_STEALING;
-    }
-
     detail::DispatchUtils::executeWithStrategy(
         *this, values, results, strategy,
         [](const UniformDistribution& dist, double value) { return dist.getProbability(value); },
@@ -1960,11 +1955,6 @@ void UniformDistribution::getProbabilityWithStrategy(std::span<const double> val
 void UniformDistribution::getLogProbabilityWithStrategy(std::span<const double> values,
                                                         std::span<double> results,
                                                         detail::Strategy strategy) const {
-    // GPU acceleration fallback - GPU implementation not yet available, use optimal CPU strategy
-    if (strategy == detail::Strategy::WORK_STEALING) {
-        strategy = detail::Strategy::WORK_STEALING;
-    }
-
     detail::DispatchUtils::executeWithStrategy(
         *this, values, results, strategy,
         [](const UniformDistribution& dist, double value) { return dist.getLogProbability(value); },
@@ -2112,11 +2102,6 @@ void UniformDistribution::getLogProbabilityWithStrategy(std::span<const double>
 void UniformDistribution::getCumulativeProbabilityWithStrategy(std::span<const double> values,
                                                                std::span<double> results,
                                                                detail::Strategy strategy) const {
-    // GPU acceleration fallback - GPU implementation not yet available, use optimal CPU strategy
-    if (strategy == detail::Strategy::WORK_STEALING) {
-        strategy = detail::Strategy::WORK_STEALING;
-    }
-
     detail::DispatchUtils::executeWithStrategy(
         *this, values, results, strategy,
         [](const UniformDistribution& dist, double value) {
diff --git a/tests/test_simd_policy.cpp b/tests/test_simd_policy.cpp
index 7a132d8..00703e6 100644
--- a/tests/test_simd_policy.cpp
+++ b/tests/test_simd_policy.cpp
@@ -234,7 +234,7 @@ void test_simd_level_detection() {
 
     // Test 2: Level consistency with runtime CPU detection
     {
-        SIMDPolicy::Level policy_level = SIMDPolicy::getBestLevel();
+        [[maybe_unused]] SIMDPolicy::Level policy_level = SIMDPolicy::getBestLevel();
 
         // Compare with direct CPU detection
         bool cpu_supports_avx512 = stats::arch::supports_avx512();