diff --git a/.env b/.env index 215aa49109b4..1231d0788739 100644 --- a/.env +++ b/.env @@ -62,7 +62,7 @@ HDFS=3.2.1 JDK=11 KARTOTHEK=latest # LLVM 12 and GCC 11 reports -Wmismatched-new-delete. -LLVM=14 +LLVM=21 MAVEN=3.8.7 NODE=18 NUMBA=latest diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index 93bc723cd430..46ea95f8627d 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -136,7 +136,7 @@ jobs: fetch-depth: 0 submodules: recursive - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: .docker key: ${{ matrix.image }}-${{ hashFiles('cpp/**') }} @@ -191,7 +191,7 @@ jobs: macos: name: ${{ matrix.architecture }} macOS ${{ matrix.macos-version }} C++ - runs-on: macos-${{ matrix.macos-version }} + runs-on: ${{ matrix.runs-on }} if: ${{ !contains(github.event.pull_request.title, 'WIP') }} timeout-minutes: 75 strategy: @@ -199,9 +199,11 @@ jobs: matrix: include: - architecture: AMD64 - macos-version: "13" + macos-version: "14" + runs-on: macos-14-large - architecture: ARM64 macos-version: "14" + runs-on: macos-14 env: ARROW_AZURE: ON ARROW_BUILD_TESTS: ON @@ -259,7 +261,7 @@ jobs: run: | echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT - name: Cache ccache - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: ${{ steps.ccache-info.outputs.cache-dir }} key: cpp-ccache-macos-${{ matrix.macos-version }}-${{ hashFiles('cpp/**') }} @@ -351,7 +353,7 @@ jobs: run: | echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT - name: Cache ccache - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: ${{ steps.ccache-info.outputs.cache-dir }} key: cpp-ccache-windows-${{ env.CACHE_VERSION }}-${{ hashFiles('cpp/**') }} @@ -441,7 +443,7 @@ jobs: shell: msys2 {0} run: ci/scripts/msys2_setup.sh cpp - name: Cache ccache - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: ccache key: cpp-ccache-${{ matrix.msystem_lower}}-${{ hashFiles('cpp/**') }} diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index d2436fe3c452..5d9ad9bab0a0 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -53,7 +53,7 @@ jobs: python -m pip install pre-commit pre-commit run --show-diff-on-failure --color=always - name: Cache pre-commit - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: ~/.cache/pre-commit key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }} diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 1219f7526f9f..6fc6fa28fdb2 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -46,7 +46,7 @@ jobs: run: | ci/scripts/util_free_space.sh - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: .docker key: debian-docs-${{ hashFiles('cpp/**') }} diff --git a/.github/workflows/docs_light.yml b/.github/workflows/docs_light.yml index 7d540b7cecdc..a13e29ce0dd1 100644 --- a/.github/workflows/docs_light.yml +++ b/.github/workflows/docs_light.yml @@ -53,7 +53,7 @@ jobs: with: fetch-depth: 0 - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: .docker key: conda-docs-${{ hashFiles('cpp/**') }} diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index af9a98ed437f..ea2dfe2c5710 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -90,7 +90,7 @@ jobs: run: | ci/scripts/util_free_space.sh - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: .docker key: conda-${{ hashFiles('cpp/**') }} diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml index 6c0cf0991168..462538cbcdfd 100644 --- a/.github/workflows/java.yml +++ b/.github/workflows/java.yml @@ -72,7 +72,7 @@ jobs: fetch-depth: 0 submodules: recursive - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: .docker key: maven-${{ hashFiles('java/**') }} diff --git a/.github/workflows/java_jni.yml b/.github/workflows/java_jni.yml index 56aa1d099288..a59d87dbf5ab 100644 --- a/.github/workflows/java_jni.yml +++ b/.github/workflows/java_jni.yml @@ -66,7 +66,7 @@ jobs: run: | ci/scripts/util_free_space.sh - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: .docker key: java-jni-manylinux-2014-${{ hashFiles('cpp/**', 'java/**') }} @@ -108,7 +108,7 @@ jobs: fetch-depth: 0 submodules: recursive - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: .docker key: maven-${{ hashFiles('java/**') }} diff --git a/.github/workflows/js.yml b/.github/workflows/js.yml index a51ad867aa70..2619b85d27e2 100644 --- a/.github/workflows/js.yml +++ b/.github/workflows/js.yml @@ -95,7 +95,7 @@ jobs: with: fetch-depth: 0 - name: Jest Cache - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: js/.jest-cache key: js-jest-cache-${{ runner.os }}-${{ hashFiles('js/src/**/*.ts', 'js/test/**/*.ts', 'js/yarn.lock') }} @@ -126,7 +126,7 @@ jobs: with: fetch-depth: 0 - name: Jest Cache - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: js/.jest-cache key: js-jest-cache-${{ runner.os }}-${{ hashFiles('js/src/**/*.ts', 'js/test/**/*.ts', 'js/yarn.lock') }} diff --git a/.github/workflows/matlab.yml b/.github/workflows/matlab.yml index 7d217b07ad7d..e5d33be2e7e4 100644 --- a/.github/workflows/matlab.yml +++ b/.github/workflows/matlab.yml @@ -81,7 +81,7 @@ jobs: shell: bash run: echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT - name: Cache ccache - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: ${{ steps.ccache-info.outputs.cache-dir }} key: matlab-ccache-ubuntu-${{ hashFiles('cpp/**', 'matlab/**') }} @@ -99,15 +99,17 @@ jobs: strict: true macos: name: ${{ matrix.architecture }} macOS ${{ matrix.macos-version }} MATLAB - runs-on: macos-${{ matrix.macos-version }} + runs-on: ${{ matrix.runs-on }} if: ${{ !contains(github.event.pull_request.title, 'WIP') }} strategy: matrix: include: - architecture: AMD64 - macos-version: "13" + macos-version: "14" + runs-on: macos-14-large - architecture: ARM64 macos-version: "14" + runs-on: macos-14 steps: - name: Check out repository uses: actions/checkout@v4 @@ -129,7 +131,7 @@ jobs: shell: bash run: echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT - name: Cache ccache - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: ${{ steps.ccache-info.outputs.cache-dir }} key: matlab-ccache-macos-${{ hashFiles('cpp/**', 'matlab/**') }} @@ -172,7 +174,7 @@ jobs: shell: bash run: echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT - name: Cache ccache - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: | ${{ steps.ccache-info.outputs.cache-dir }} diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 84c8a6553b00..b513c0c17a06 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -103,7 +103,7 @@ jobs: fetch-depth: 0 submodules: recursive - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: .docker key: ${{ matrix.cache }}-${{ hashFiles('cpp/**') }} @@ -135,7 +135,7 @@ jobs: macos: name: ${{ matrix.architecture }} macOS ${{ matrix.macos-version }} Python 3 - runs-on: macos-${{ matrix.macos-version }} + runs-on: ${{ matrix.runs-on }} if: ${{ !contains(github.event.pull_request.title, 'WIP') }} timeout-minutes: 60 strategy: @@ -143,9 +143,11 @@ jobs: matrix: include: - architecture: AMD64 - macos-version: "13" + macos-version: "14" + runs-on: macos-14-large - architecture: ARM64 macos-version: "14" + runs-on: macos-14 env: ARROW_HOME: /tmp/local ARROW_AZURE: ON @@ -200,7 +202,7 @@ jobs: shell: bash run: echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT - name: Cache ccache - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: ${{ steps.ccache-info.outputs.cache-dir }} key: python-ccache-macos-${{ matrix.macos-version }}-${{ hashFiles('cpp/**', 'python/**') }} diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index 1897f332f750..f9cf790acd7a 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -138,7 +138,7 @@ jobs: run: | ci/scripts/util_free_space.sh - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: .docker # As this key is identical on both matrix builds only one will be able to successfully cache, @@ -268,7 +268,7 @@ jobs: ci/scripts/ccache_setup.sh echo "CCACHE_DIR=$(cygpath --absolute --windows ccache)" >> $GITHUB_ENV - name: Cache ccache - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: ccache key: r-${{ matrix.config.rtools }}-ccache-mingw-${{ matrix.config.arch }}-${{ hashFiles('cpp/src/**/*.cc','cpp/src/**/*.h)') }}-${{ github.run_id }} diff --git a/.github/workflows/r_nightly.yml b/.github/workflows/r_nightly.yml index 9817e41d3b61..33db17f0db0f 100644 --- a/.github/workflows/r_nightly.yml +++ b/.github/workflows/r_nightly.yml @@ -86,7 +86,7 @@ jobs: exit 1 fi - name: Cache Repo - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: repo key: r-nightly-${{ github.run_id }} diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml index 13da7e62ee0c..d1d56379af63 100644 --- a/.github/workflows/ruby.yml +++ b/.github/workflows/ruby.yml @@ -79,7 +79,7 @@ jobs: fetch-depth: 0 submodules: recursive - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: .docker key: ubuntu-${{ matrix.ubuntu }}-ruby-${{ hashFiles('cpp/**') }} @@ -168,7 +168,7 @@ jobs: run: | echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT - name: Cache ccache - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: ${{ steps.ccache-info.outputs.cache-dir }} key: ruby-ccache-macos-${{ hashFiles('cpp/**') }} @@ -251,7 +251,7 @@ jobs: run: | ridk exec bash ci\scripts\msys2_setup.sh ruby - name: Cache ccache - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: ccache key: ruby-ccache-ucrt${{ matrix.mingw-n-bits }}-${{ hashFiles('cpp/**') }} @@ -275,7 +275,7 @@ jobs: Write-Output "gem-dir=$(ridk exec gem env gemdir)" | ` Out-File -FilePath $env:GITHUB_OUTPUT -Encoding utf8 -Append - name: Cache RubyGems - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: ${{ steps.rubygems-info.outputs.gem-dir }} key: ruby-rubygems-ucrt${{ matrix.mingw-n-bits }}-${{ hashFiles('**/Gemfile', 'ruby/*/*.gemspec') }} @@ -384,7 +384,7 @@ jobs: run: | echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT - name: Cache ccache - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: ${{ steps.ccache-info.outputs.cache-dir }} key: glib-ccache-msvc-${{ env.CACHE_VERSION }}-${{ hashFiles('cpp/**') }} diff --git a/ci/scripts/java_jni_macos_build.sh b/ci/scripts/java_jni_macos_build.sh index 4ecc029bdd3c..7fcca77b2951 100755 --- a/ci/scripts/java_jni_macos_build.sh +++ b/ci/scripts/java_jni_macos_build.sh @@ -63,6 +63,56 @@ export ARROW_TEST_DATA="${arrow_dir}/testing/data" export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data" export AWS_EC2_METADATA_DISABLED=TRUE +# Determine vcpkg triplet based on architecture +vcpkg_arch=$(arch) +case ${vcpkg_arch} in + arm64) + vcpkg_triplet="arm64-osx" + ;; + i386|x86_64) + vcpkg_triplet="x64-osx" + ;; + *) + vcpkg_triplet="arm64-osx" + ;; +esac + +# Set LLVM_DIR to point to vcpkg-installed LLVM if VCPKG_ROOT_LOCAL is set +llvm_dir_arg="" +gandiva_cxx_flags="" +osx_sysroot_arg="" +re2_source_arg="-Dre2_SOURCE=BUNDLED" +if [ -n "${VCPKG_ROOT_LOCAL:-}" ]; then + vcpkg_installed="${VCPKG_ROOT_LOCAL}/installed/${vcpkg_triplet}" + llvm_cmake_dir="${vcpkg_installed}/share/llvm" + if [ -d "${llvm_cmake_dir}" ]; then + echo "=== Found vcpkg LLVM at ${llvm_cmake_dir} ===" + llvm_dir_arg="-DLLVM_DIR=${llvm_cmake_dir}" + + # vcpkg's clang needs to know where to find system headers + # Arrow's GandivaAddBitcode.cmake uses CMAKE_OSX_SYSROOT to set SDKROOT env var + sdk_path="$(xcrun --show-sdk-path)" + if [ -d "${sdk_path}" ]; then + osx_sysroot_arg="-DCMAKE_OSX_SYSROOT=${sdk_path}" + fi + + # Also pass the C++ standard library include path via ARROW_GANDIVA_PC_CXX_FLAGS + xcode_path="$(xcode-select -p)" + cxx_include_path="${xcode_path}/Toolchains/XcodeDefault.xctoolchain/usr/include/c++/v1" + if [ -d "${cxx_include_path}" ]; then + gandiva_cxx_flags="-DARROW_GANDIVA_PC_CXX_FLAGS=-stdlib=libc++;-isystem;${cxx_include_path}" + fi + + # Use vcpkg's RE2 since it's installed as a dependency of LLVM + # This ensures ABI compatibility - vcpkg's RE2 uses std::string_view API + # which matches what vcpkg's LLVM and Abseil expect + re2_cmake_dir="${vcpkg_installed}/share/re2" + if [ -d "${re2_cmake_dir}" ]; then + re2_source_arg="-Dre2_ROOT=${vcpkg_installed}" + fi + fi +fi + mkdir -p "${build_dir}/cpp" pushd "${build_dir}/cpp" @@ -81,14 +131,19 @@ cmake \ -DARROW_PARQUET=${ARROW_PARQUET} \ -DARROW_S3=${ARROW_S3} \ -DARROW_USE_CCACHE=${ARROW_USE_CCACHE} \ + -DAWSSDK_SOURCE=BUNDLED \ + -DBoost_SOURCE=BUNDLED \ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ -DCMAKE_INSTALL_PREFIX=${install_dir} \ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \ -DGTest_SOURCE=BUNDLED \ + ${llvm_dir_arg} \ + ${osx_sysroot_arg} \ + ${gandiva_cxx_flags} \ -DPARQUET_BUILD_EXAMPLES=OFF \ -DPARQUET_BUILD_EXECUTABLES=OFF \ -DPARQUET_REQUIRE_ENCRYPTION=OFF \ - -Dre2_SOURCE=BUNDLED \ + ${re2_source_arg} \ -GNinja \ ${arrow_dir}/cpp cmake --build . --target install diff --git a/ci/vcpkg/overlay/llvm/0001-fix-install-package-dir.patch b/ci/vcpkg/overlay/llvm/0001-fix-install-package-dir.patch new file mode 100644 index 000000000000..6bbe1af08f89 --- /dev/null +++ b/ci/vcpkg/overlay/llvm/0001-fix-install-package-dir.patch @@ -0,0 +1,13 @@ + openmp/tools/Modules/CMakeLists.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/openmp/tools/Modules/CMakeLists.txt b/openmp/tools/Modules/CMakeLists.txt +index 22d818eea72d..75aacc4468d4 100644 +--- a/openmp/tools/Modules/CMakeLists.txt ++++ b/openmp/tools/Modules/CMakeLists.txt +@@ -12,4 +12,4 @@ + + + install(FILES "FindOpenMPTarget.cmake" +- DESTINATION "${OPENMP_INSTALL_LIBDIR}/cmake/openmp") ++ DESTINATION "share/openmp") diff --git a/ci/vcpkg/overlay/llvm/0002-fix-tools-install-dir.patch b/ci/vcpkg/overlay/llvm/0002-fix-tools-install-dir.patch new file mode 100644 index 000000000000..bf46382567a3 --- /dev/null +++ b/ci/vcpkg/overlay/llvm/0002-fix-tools-install-dir.patch @@ -0,0 +1,205 @@ + bolt/cmake/modules/AddBOLT.cmake | 2 +- + clang-tools-extra/clang-tidy/tool/CMakeLists.txt | 2 +- + clang-tools-extra/modularize/CMakeLists.txt | 2 +- + clang/cmake/modules/AddClang.cmake | 4 ++-- + clang/tools/c-index-test/CMakeLists.txt | 2 +- + clang/tools/clang-format/CMakeLists.txt | 4 ++-- + clang/tools/scan-build-py/CMakeLists.txt | 4 ++-- + clang/tools/scan-build/CMakeLists.txt | 2 +- + clang/tools/scan-view/CMakeLists.txt | 2 +- + flang/cmake/modules/AddFlang.cmake | 2 +- + flang/tools/flang-driver/CMakeLists.txt | 2 +- + lld/cmake/modules/AddLLD.cmake | 4 ++-- + lldb/cmake/modules/AddLLDB.cmake | 2 +- + 13 files changed, 17 insertions(+), 17 deletions(-) + +diff --git a/bolt/cmake/modules/AddBOLT.cmake b/bolt/cmake/modules/AddBOLT.cmake +index c7ac662c6b12..f5a7e7c01c66 100644 +--- a/bolt/cmake/modules/AddBOLT.cmake ++++ b/bolt/cmake/modules/AddBOLT.cmake +@@ -16,7 +16,7 @@ macro(add_bolt_tool name) + get_target_export_arg(${name} BOLT export_to_bolttargets) + install(TARGETS ${name} + ${export_to_bolttargets} +- RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" ++ RUNTIME DESTINATION "${BOLT_TOOLS_INSTALL_DIR}" + COMPONENT bolt) + + if(NOT LLVM_ENABLE_IDE) +diff --git a/clang-tools-extra/clang-tidy/tool/CMakeLists.txt b/clang-tools-extra/clang-tidy/tool/CMakeLists.txt +index 0d4501d1eac0..a6ff0261f5f3 100644 +--- a/clang-tools-extra/clang-tidy/tool/CMakeLists.txt ++++ b/clang-tools-extra/clang-tidy/tool/CMakeLists.txt +@@ -66,6 +66,6 @@ install(PROGRAMS clang-tidy-diff.py + DESTINATION "${CMAKE_INSTALL_DATADIR}/clang" + COMPONENT clang-tidy) + install(PROGRAMS run-clang-tidy.py +- DESTINATION "${CMAKE_INSTALL_BINDIR}" ++ DESTINATION "${CLANG_TOOLS_INSTALL_DIR}" + COMPONENT clang-tidy + RENAME run-clang-tidy) +diff --git a/clang-tools-extra/modularize/CMakeLists.txt b/clang-tools-extra/modularize/CMakeLists.txt +index eb5383c3ad44..39a34dfe8c71 100644 +--- a/clang-tools-extra/modularize/CMakeLists.txt ++++ b/clang-tools-extra/modularize/CMakeLists.txt +@@ -27,5 +27,5 @@ clang_target_link_libraries(modularize + ) + + install(TARGETS modularize +- RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" ++ RUNTIME DESTINATION "${CLANG_TOOLS_INSTALL_DIR}" + COMPONENT clang-extras) +diff --git a/clang/cmake/modules/AddClang.cmake b/clang/cmake/modules/AddClang.cmake +index 4059fc3e986c..2dc34826ba1e 100644 +--- a/clang/cmake/modules/AddClang.cmake ++++ b/clang/cmake/modules/AddClang.cmake +@@ -183,11 +183,11 @@ macro(add_clang_tool name) + get_target_export_arg(${name} Clang export_to_clangtargets) + install(TARGETS ${name} + ${export_to_clangtargets} +- RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" ++ RUNTIME DESTINATION "${CLANG_TOOLS_INSTALL_DIR}" + COMPONENT ${name}) + + if (LLVM_ENABLE_PDB) +- install(FILES $ DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT ${name} OPTIONAL) ++ install(FILES $ DESTINATION "${CLANG_TOOLS_INSTALL_DIR}" COMPONENT ${name} OPTIONAL) + endif() + + if(NOT LLVM_ENABLE_IDE) +diff --git a/clang/tools/c-index-test/CMakeLists.txt b/clang/tools/c-index-test/CMakeLists.txt +index 24e7c9692ca5..841f49cd5e0b 100644 +--- a/clang/tools/c-index-test/CMakeLists.txt ++++ b/clang/tools/c-index-test/CMakeLists.txt +@@ -48,7 +48,7 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) + set_property(TARGET c-index-test APPEND PROPERTY INSTALL_RPATH + "@executable_path/../../lib") + else() +- set(INSTALL_DESTINATION "${CMAKE_INSTALL_BINDIR}") ++ set(INSTALL_DESTINATION "${CLANG_TOOLS_INSTALL_DIR}") + endif() + + install(TARGETS c-index-test +diff --git a/clang/tools/clang-format/CMakeLists.txt b/clang/tools/clang-format/CMakeLists.txt +index 1c61a3c8fb80..41f019c1fbf9 100644 +--- a/clang/tools/clang-format/CMakeLists.txt ++++ b/clang/tools/clang-format/CMakeLists.txt +@@ -36,11 +36,11 @@ install(FILES clang-format.py + DESTINATION "${CMAKE_INSTALL_DATADIR}/clang" + COMPONENT clang-format) + install(PROGRAMS git-clang-format +- DESTINATION "${CMAKE_INSTALL_BINDIR}" ++ DESTINATION "${CLANG_TOOLS_INSTALL_DIR}" + COMPONENT clang-format) + + if (WIN32 AND NOT CYGWIN) + install(PROGRAMS git-clang-format.bat +- DESTINATION "${CMAKE_INSTALL_BINDIR}" ++ DESTINATION "${CLANG_TOOLS_INSTALL_DIR}" + COMPONENT clang-format) + endif() +diff --git a/clang/tools/scan-build-py/CMakeLists.txt b/clang/tools/scan-build-py/CMakeLists.txt +index 9273eb5ed977..f9abcb2ca248 100644 +--- a/clang/tools/scan-build-py/CMakeLists.txt ++++ b/clang/tools/scan-build-py/CMakeLists.txt +@@ -43,7 +43,7 @@ foreach(BinFile ${BinFiles}) + ${CMAKE_BINARY_DIR}/bin/scan-build-py + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/bin/scan-build) + install (PROGRAMS "bin/scan-build" +- DESTINATION "${CMAKE_INSTALL_BINDIR}" ++ DESTINATION "${CLANG_TOOLS_INSTALL_DIR}" + RENAME scan-build-py + COMPONENT scan-build-py) + list(APPEND Depends ${CMAKE_BINARY_DIR}/bin/scan-build-py) +@@ -56,7 +56,7 @@ foreach(BinFile ${BinFiles}) + ${CMAKE_BINARY_DIR}/bin/ + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/bin/${BinFile}) + install(PROGRAMS bin/${BinFile} +- DESTINATION "${CMAKE_INSTALL_BINDIR}" ++ DESTINATION "${CLANG_TOOLS_INSTALL_DIR}" + COMPONENT scan-build-py) + list(APPEND Depends ${CMAKE_BINARY_DIR}/bin/${BinFile}) + endif() +diff --git a/clang/tools/scan-build/CMakeLists.txt b/clang/tools/scan-build/CMakeLists.txt +index ef687b0e90a1..47f31efc9174 100644 +--- a/clang/tools/scan-build/CMakeLists.txt ++++ b/clang/tools/scan-build/CMakeLists.txt +@@ -47,7 +47,7 @@ if(CLANG_INSTALL_SCANBUILD) + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/bin/${BinFile}) + list(APPEND Depends ${CMAKE_BINARY_DIR}/bin/${BinFile}) + install(PROGRAMS bin/${BinFile} +- DESTINATION "${CMAKE_INSTALL_BINDIR}" ++ DESTINATION "${CLANG_TOOLS_INSTALL_DIR}" + COMPONENT scan-build) + endforeach() + +diff --git a/clang/tools/scan-view/CMakeLists.txt b/clang/tools/scan-view/CMakeLists.txt +index 07aec76ee66f..55a945bb278d 100644 +--- a/clang/tools/scan-view/CMakeLists.txt ++++ b/clang/tools/scan-view/CMakeLists.txt +@@ -20,7 +20,7 @@ if(CLANG_INSTALL_SCANVIEW) + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/bin/${BinFile}) + list(APPEND Depends ${CMAKE_BINARY_DIR}/bin/${BinFile}) + install(PROGRAMS bin/${BinFile} +- DESTINATION "${CMAKE_INSTALL_BINDIR}" ++ DESTINATION "${CLANG_TOOLS_INSTALL_DIR}" + COMPONENT scan-view) + endforeach() + +diff --git a/flang/cmake/modules/AddFlang.cmake b/flang/cmake/modules/AddFlang.cmake +index ca233103ccdb..e079f33d9426 100644 +--- a/flang/cmake/modules/AddFlang.cmake ++++ b/flang/cmake/modules/AddFlang.cmake +@@ -122,7 +122,7 @@ macro(add_flang_tool name) + get_target_export_arg(${name} Flang export_to_flangtargets) + install(TARGETS ${name} + ${export_to_flangtargets} +- RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" ++ RUNTIME DESTINATION "${FLANG_TOOLS_INSTALL_DIR}" + COMPONENT ${name}) + + if(NOT LLVM_ENABLE_IDE) +diff --git a/flang/tools/flang-driver/CMakeLists.txt b/flang/tools/flang-driver/CMakeLists.txt +index b5d672702512..67f5d4304dac 100644 +--- a/flang/tools/flang-driver/CMakeLists.txt ++++ b/flang/tools/flang-driver/CMakeLists.txt +@@ -43,7 +43,7 @@ if(FLANG_PLUGIN_SUPPORT) + export_executable_symbols_for_plugins(flang) + endif() + +-install(TARGETS flang DESTINATION "${CMAKE_INSTALL_BINDIR}") ++install(TARGETS flang DESTINATION "${FLANG_TOOLS_INSTALL_DIR}") + + # Keep "flang-new" as a symlink for backwards compatiblity. Remove once "flang" + # is a widely adopted name. +diff --git a/lld/cmake/modules/AddLLD.cmake b/lld/cmake/modules/AddLLD.cmake +index 37f73afa915f..7df335698aab 100644 +--- a/lld/cmake/modules/AddLLD.cmake ++++ b/lld/cmake/modules/AddLLD.cmake +@@ -55,11 +55,11 @@ macro(add_lld_tool name) + get_target_export_arg(${name} LLD export_to_lldtargets) + install(TARGETS ${name} + ${export_to_lldtargets} +- RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" ++ RUNTIME DESTINATION "${LLD_TOOLS_INSTALL_DIR}" + COMPONENT ${name}) + + if (LLVM_ENABLE_PDB) +- install(FILES $ DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT ${name} OPTIONAL) ++ install(FILES $ DESTINATION "${LLD_TOOLS_INSTALL_DIR}" COMPONENT ${name} OPTIONAL) + endif() + + if(NOT CMAKE_CONFIGURATION_TYPES) +diff --git a/lldb/cmake/modules/AddLLDB.cmake b/lldb/cmake/modules/AddLLDB.cmake +index 28bf8d816d89..372eec626160 100644 +--- a/lldb/cmake/modules/AddLLDB.cmake ++++ b/lldb/cmake/modules/AddLLDB.cmake +@@ -184,7 +184,7 @@ function(add_lldb_executable name) + endif() + + if(ARG_GENERATE_INSTALL) +- set(install_dest bin) ++ set(install_dest "${LLVM_TOOLS_INSTALL_DIR}") + if(ARG_INSTALL_PREFIX) + set(install_dest ${ARG_INSTALL_PREFIX}) + endif() diff --git a/ci/vcpkg/overlay/llvm/0003-fix-llvm-config.patch b/ci/vcpkg/overlay/llvm/0003-fix-llvm-config.patch new file mode 100644 index 000000000000..568cb17cbb05 --- /dev/null +++ b/ci/vcpkg/overlay/llvm/0003-fix-llvm-config.patch @@ -0,0 +1,16 @@ + llvm/tools/llvm-config/llvm-config.cpp | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/llvm/tools/llvm-config/llvm-config.cpp b/llvm/tools/llvm-config/llvm-config.cpp +index d5b76b1bb6c1..9fedcb2ab75f 100644 +--- a/llvm/tools/llvm-config/llvm-config.cpp ++++ b/llvm/tools/llvm-config/llvm-config.cpp +@@ -304,7 +304,7 @@ int main(int argc, char **argv) { + // bin dir). + sys::fs::make_absolute(CurrentPath); + CurrentExecPrefix = +- sys::path::parent_path(sys::path::parent_path(CurrentPath)).str(); ++ sys::path::parent_path(sys::path::parent_path(sys::path::parent_path(CurrentPath))).str(); + + // Check to see if we are inside a development tree by comparing to possible + // locations (prefix style or CMake style). diff --git a/ci/vcpkg/overlay/llvm/0004-disable-libomp-aliases.patch b/ci/vcpkg/overlay/llvm/0004-disable-libomp-aliases.patch new file mode 100644 index 000000000000..6736f2a31d53 --- /dev/null +++ b/ci/vcpkg/overlay/llvm/0004-disable-libomp-aliases.patch @@ -0,0 +1,32 @@ + openmp/runtime/src/CMakeLists.txt | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt +index bb5822264514..340cef14df89 100644 +--- a/openmp/runtime/src/CMakeLists.txt ++++ b/openmp/runtime/src/CMakeLists.txt +@@ -215,7 +215,7 @@ endif() + set(LIBOMP_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE) + + # Add symbolic links to libomp +-if(NOT WIN32) ++if(0) + add_custom_command(TARGET omp POST_BUILD + COMMAND ${CMAKE_COMMAND} -E create_symlink ${LIBOMP_LIB_FILE} + libgomp${LIBOMP_LIBRARY_SUFFIX} +@@ -367,6 +367,7 @@ if(WIN32) + install(TARGETS omp ${export_to_llvmexports} RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}") + install(TARGETS ${LIBOMP_IMP_LIB_TARGET} ARCHIVE DESTINATION "${OPENMP_INSTALL_LIBDIR}") + # Create aliases (regular copies) of the library for backwards compatibility ++ if(0) + set(LIBOMP_ALIASES "libiomp5md") + foreach(alias IN LISTS LIBOMP_ALIASES) + install(CODE "execute_process(COMMAND \"\${CMAKE_COMMAND}\" -E copy \"${LIBOMP_LIB_FILE}\" +@@ -375,6 +376,7 @@ if(WIN32) + install(CODE "execute_process(COMMAND \"\${CMAKE_COMMAND}\" -E copy \"${LIBOMP_IMP_LIB_FILE}\" + \"${alias}${CMAKE_STATIC_LIBRARY_SUFFIX}\" WORKING_DIRECTORY \"${outdir}\")") + endforeach() ++ endif() + else() + + install(TARGETS omp ${export_to_llvmexports} ${LIBOMP_INSTALL_KIND} DESTINATION "${OPENMP_INSTALL_LIBDIR}") diff --git a/ci/vcpkg/overlay/llvm/0005-fix-runtimes.patch b/ci/vcpkg/overlay/llvm/0005-fix-runtimes.patch new file mode 100644 index 000000000000..0047333171ef --- /dev/null +++ b/ci/vcpkg/overlay/llvm/0005-fix-runtimes.patch @@ -0,0 +1,23 @@ + llvm/runtimes/CMakeLists.txt | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt +index 94a43b96d218..fff91366fbb2 100644 +--- a/llvm/runtimes/CMakeLists.txt ++++ b/llvm/runtimes/CMakeLists.txt +@@ -504,11 +504,13 @@ if(build_runtimes) + # Forward user-provived system configuration to runtimes for requirement introspection. + # CMAKE_PREFIX_PATH is the search path for CMake packages. + if(CMAKE_PREFIX_PATH) +- list(APPEND extra_cmake_args "-DCMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}") ++ string(REPLACE ";" "|" new_value "${CMAKE_PREFIX_PATH}") ++ list(APPEND extra_cmake_args "-DCMAKE_PREFIX_PATH=${new_value}") + endif() + # CMAKE_PROGRAM_PATH is the search path for executables such as python. + if(CMAKE_PROGRAM_PATH) +- list(APPEND extra_cmake_args "-DCMAKE_PROGRAM_PATH=${CMAKE_PROGRAM_PATH}") ++ string(REPLACE ";" "|" new_value "${CMAKE_PROGRAM_PATH}") ++ list(APPEND extra_cmake_args "-DCMAKE_PROGRAM_PATH=${new_value}") + endif() + + # TODO: We need to consider passing it as '-DRUNTIMES_x86_64_LLVM_ENABLE_RUNTIMES'. diff --git a/ci/vcpkg/overlay/llvm/0006-create-destination-mlir-directory.patch b/ci/vcpkg/overlay/llvm/0006-create-destination-mlir-directory.patch new file mode 100644 index 000000000000..4950a48c3b5e --- /dev/null +++ b/ci/vcpkg/overlay/llvm/0006-create-destination-mlir-directory.patch @@ -0,0 +1,16 @@ + mlir/python/CMakeLists.txt | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/mlir/python/CMakeLists.txt b/mlir/python/CMakeLists.txt +index 50fbca38a08f..957a6722c21c 100644 +--- a/mlir/python/CMakeLists.txt ++++ b/mlir/python/CMakeLists.txt +@@ -527,6 +527,8 @@ add_mlir_python_common_capi_library(MLIRPythonCAPI + ${_ADDL_TEST_SOURCES} + ) + ++file(MAKE_DIRECTORY "${MLIR_BINARY_DIR}/python_packages/mlir_core/mlir/_mlir_libs") ++ + ################################################################################ + # Custom targets. + ################################################################################ diff --git a/ci/vcpkg/overlay/llvm/clang_usage b/ci/vcpkg/overlay/llvm/clang_usage new file mode 100644 index 000000000000..f239721f484f --- /dev/null +++ b/ci/vcpkg/overlay/llvm/clang_usage @@ -0,0 +1,5 @@ +The package clang provides CMake targets: + + find_package(Clang CONFIG REQUIRED) + target_include_directories(main PRIVATE ${CLANG_INCLUDE_DIRS}) + target_link_libraries(main PRIVATE clangBasic clangLex clangParse clangAST ...) diff --git a/ci/vcpkg/overlay/llvm/cmake4.patch b/ci/vcpkg/overlay/llvm/cmake4.patch new file mode 100644 index 000000000000..984981e88f8b --- /dev/null +++ b/ci/vcpkg/overlay/llvm/cmake4.patch @@ -0,0 +1,117 @@ +diff --git a/cmake/Modules/HandleCompilerRT.cmake b/cmake/Modules/HandleCompilerRT.cmake +index 6865f45175..33dda44d18 100644 +--- a/cmake/Modules/HandleCompilerRT.cmake ++++ b/cmake/Modules/HandleCompilerRT.cmake +@@ -20,25 +20,25 @@ function(get_component_name name variable) + if(NOT name MATCHES "builtins.*") + set(component_name "${name}_") + endif() +- if (CMAKE_OSX_SYSROOT MATCHES ".+MacOSX.+") ++ if (_CMAKE_OSX_SYSROOT_PATH MATCHES ".+MacOSX.+") + set(component_name "${component_name}osx") + +- elseif (CMAKE_OSX_SYSROOT MATCHES ".+iPhoneOS.+") ++ elseif (_CMAKE_OSX_SYSROOT_PATH MATCHES ".+iPhoneOS.+") + set(component_name "${component_name}ios") +- elseif (CMAKE_OSX_SYSROOT MATCHES ".+iPhoneSimulator.+") ++ elseif (_CMAKE_OSX_SYSROOT_PATH MATCHES ".+iPhoneSimulator.+") + set(component_name "${component_name}iossim") + +- elseif (CMAKE_OSX_SYSROOT MATCHES ".+AppleTVOS.+") ++ elseif (_CMAKE_OSX_SYSROOT_PATH MATCHES ".+AppleTVOS.+") + set(component_name "${component_name}tvos") +- elseif (CMAKE_OSX_SYSROOT MATCHES ".+AppleTVSimulator.+") ++ elseif (_CMAKE_OSX_SYSROOT_PATH MATCHES ".+AppleTVSimulator.+") + set(component_name "${component_name}tvossim") + +- elseif (CMAKE_OSX_SYSROOT MATCHES ".+WatchOS.+") ++ elseif (_CMAKE_OSX_SYSROOT_PATH MATCHES ".+WatchOS.+") + set(component_name "${component_name}watchos") +- elseif (CMAKE_OSX_SYSROOT MATCHES ".+WatchSimulator.+") ++ elseif (_CMAKE_OSX_SYSROOT_PATH MATCHES ".+WatchSimulator.+") + set(component_name "${component_name}watchossim") + else() +- message(WARNING "Unknown Apple SDK ${CMAKE_OSX_SYSROOT}, we don't know which compiler-rt library suffix to use.") ++ message(WARNING "Unknown Apple SDK ${_CMAKE_OSX_SYSROOT_PATH}, we don't know which compiler-rt library suffix to use.") + endif() + else() + set(component_name "${name}") +diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt +index f4722c3b35..a553615ebc 100644 +--- a/libcxxabi/src/CMakeLists.txt ++++ b/libcxxabi/src/CMakeLists.txt +@@ -134,7 +134,7 @@ if ( APPLE ) + # Make sure we link in CrashReporterClient if we find it -- it's used by + # abort() on Apple platforms when building the system dylib. + find_library(CrashReporterClient NAMES libCrashReporterClient.a +- PATHS "${CMAKE_OSX_SYSROOT}/usr/local/lib") ++ PATHS "${_CMAKE_OSX_SYSROOT_PATH}/usr/local/lib") + if (CrashReporterClient) + message(STATUS "Linking with CrashReporterClient at ${CrashReporterClient}") + add_library_flags("${CrashReporterClient}") +diff --git a/lldb/tools/debugserver/source/CMakeLists.txt b/lldb/tools/debugserver/source/CMakeLists.txt +index f0b9756bec..0c784545c4 100644 +--- a/lldb/tools/debugserver/source/CMakeLists.txt ++++ b/lldb/tools/debugserver/source/CMakeLists.txt +@@ -125,17 +125,17 @@ if(APPLE) + set(LIBCOMPRESSION compression) + if(APPLE_EMBEDDED) + find_library(BACKBOARD_LIBRARY BackBoardServices +- PATHS ${CMAKE_OSX_SYSROOT}/System/Library/PrivateFrameworks) ++ PATHS ${_CMAKE_OSX_SYSROOT_PATH}/System/Library/PrivateFrameworks) + find_library(FRONTBOARD_LIBRARY FrontBoardServices +- PATHS ${CMAKE_OSX_SYSROOT}/System/Library/PrivateFrameworks) ++ PATHS ${_CMAKE_OSX_SYSROOT_PATH}/System/Library/PrivateFrameworks) + find_library(SPRINGBOARD_LIBRARY SpringBoardServices +- PATHS ${CMAKE_OSX_SYSROOT}/System/Library/PrivateFrameworks) ++ PATHS ${_CMAKE_OSX_SYSROOT_PATH}/System/Library/PrivateFrameworks) + find_library(MOBILESERVICES_LIBRARY MobileCoreServices +- PATHS ${CMAKE_OSX_SYSROOT}/System/Library/PrivateFrameworks) ++ PATHS ${_CMAKE_OSX_SYSROOT_PATH}/System/Library/PrivateFrameworks) + find_library(LOCKDOWN_LIBRARY lockdown) + if (APPLE_EMBEDDED STREQUAL "watchos") + find_library(CAROUSELSERVICES_LIBRARY CarouselServices +- PATHS ${CMAKE_OSX_SYSROOT}/System/Library/PrivateFrameworks) ++ PATHS ${_CMAKE_OSX_SYSROOT_PATH}/System/Library/PrivateFrameworks) + endif() + + if(NOT BACKBOARD_LIBRARY) +@@ -158,7 +158,7 @@ endif() + + add_definitions(-DLLDB_USE_OS_LOG) + +-if(${CMAKE_OSX_SYSROOT} MATCHES ".Internal.sdk$") ++if(${_CMAKE_OSX_SYSROOT_PATH} MATCHES ".Internal.sdk$") + message(STATUS "LLDB debugserver energy support is enabled") + add_definitions(-DLLDB_ENERGY) + set(ENERGY_LIBRARY -lpmenergy -lpmsample) +@@ -181,7 +181,7 @@ endif() + separate_arguments(MIG_ARCH_FLAGS_SEPARTED NATIVE_COMMAND "${MIG_ARCH_FLAGS}") + + add_custom_command(OUTPUT ${generated_mach_interfaces} +- VERBATIM COMMAND mig ${MIG_ARCH_FLAGS_SEPARTED} -isysroot ${CMAKE_OSX_SYSROOT} ${CMAKE_CURRENT_SOURCE_DIR}/MacOSX/dbgnub-mig.defs ++ VERBATIM COMMAND mig ${MIG_ARCH_FLAGS_SEPARTED} -isysroot ${_CMAKE_OSX_SYSROOT_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/MacOSX/dbgnub-mig.defs + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/MacOSX/dbgnub-mig.defs + ) + +@@ -305,7 +305,7 @@ if(APPLE_EMBEDDED) + WITH_BKS + ) + set_property(TARGET lldbDebugserverCommon APPEND PROPERTY COMPILE_FLAGS +- -F${CMAKE_OSX_SYSROOT}/System/Library/PrivateFrameworks ++ -F${_CMAKE_OSX_SYSROOT_PATH}/System/Library/PrivateFrameworks + ) + + add_lldb_library(lldbDebugserverCommon_NonUI ${lldbDebugserverCommonSources}) +diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt +index 6127b76db0..0a810db531 100644 +--- a/llvm/test/CMakeLists.txt ++++ b/llvm/test/CMakeLists.txt +@@ -33,6 +33,7 @@ configure_lit_site_cfg( + MAIN_CONFIG + ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py + PATHS ++ "_CMAKE_OSX_SYSROOT_PATH" + "CMAKE_OSX_SYSROOT" + "LLVM_SOURCE_DIR" + "LLVM_BINARY_DIR" diff --git a/ci/vcpkg/overlay/llvm/flang_usage b/ci/vcpkg/overlay/llvm/flang_usage new file mode 100644 index 000000000000..b3cb08592f03 --- /dev/null +++ b/ci/vcpkg/overlay/llvm/flang_usage @@ -0,0 +1,5 @@ +The package flang provides CMake targets: + + find_package(Flang CONFIG REQUIRED) + target_include_directories(main PRIVATE ${FLANG_INCLUDE_DIRS}) + target_link_libraries(main PRIVATE flangFrontend flangFrontendTool ...) diff --git a/ci/vcpkg/overlay/llvm/lld_usage b/ci/vcpkg/overlay/llvm/lld_usage new file mode 100644 index 000000000000..92c8d10a0857 --- /dev/null +++ b/ci/vcpkg/overlay/llvm/lld_usage @@ -0,0 +1,5 @@ +The package lld provides CMake targets: + + find_package(LLD CONFIG REQUIRED) + target_include_directories(main PRIVATE ${LLD_INCLUDE_DIRS}) + target_link_libraries(main PRIVATE lldCommon lldCore lldDriver ...) diff --git a/ci/vcpkg/overlay/llvm/llvm_usage b/ci/vcpkg/overlay/llvm/llvm_usage new file mode 100644 index 000000000000..72d57dbfa573 --- /dev/null +++ b/ci/vcpkg/overlay/llvm/llvm_usage @@ -0,0 +1,15 @@ +The package llvm provides CMake targets: + + find_package(LLVM CONFIG REQUIRED) + + list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}") + include(HandleLLVMOptions) + add_definitions(${LLVM_DEFINITIONS}) + + target_include_directories(main PRIVATE ${LLVM_INCLUDE_DIRS}) + + # Find the libraries that correspond to the LLVM components that we wish to use + llvm_map_components_to_libnames(llvm_libs Support Core IRReader ...) + + # Link against LLVM libraries + target_link_libraries(main PRIVATE ${llvm_libs}) diff --git a/ci/vcpkg/overlay/llvm/mlir_usage b/ci/vcpkg/overlay/llvm/mlir_usage new file mode 100644 index 000000000000..da45a1612ec8 --- /dev/null +++ b/ci/vcpkg/overlay/llvm/mlir_usage @@ -0,0 +1,5 @@ +The package lld provides CMake targets: + + find_package(MLIR CONFIG REQUIRED) + target_include_directories(main PRIVATE ${MLIR_INCLUDE_DIRS}) + target_link_libraries(main PRIVATE MLIRIR MLIRParser MLIRPass MLIRSupport ...) diff --git a/ci/vcpkg/overlay/llvm/portfile.cmake b/ci/vcpkg/overlay/llvm/portfile.cmake new file mode 100644 index 000000000000..b6d5bdacd02c --- /dev/null +++ b/ci/vcpkg/overlay/llvm/portfile.cmake @@ -0,0 +1,367 @@ +# Suppress warning: There should be no installed empty directories +set(VCPKG_POLICY_ALLOW_EMPTY_FOLDERS enabled) + +vcpkg_check_linkage(ONLY_STATIC_LIBRARY) + +# [BOLT] Allow to compile with MSVC (#151189) +vcpkg_download_distfile( + PATCH1_FILE + URLS https://github.com/llvm/llvm-project/commit/497d17737518d417f6411d46aef1334f642ccd81.patch?full_index=1 + SHA512 7bf4d4ee8f72fea5b8094320d1f3a71063ec19fe1b552424182c4140055bf6aacfa9ff64b0bcab0a8d6739e4b6249641f58d19fb6b35e1ada67b66b53776dc1a + FILENAME 497d17737518d417f6411d46aef1334f642ccd81.patch +) + +vcpkg_from_github( + OUT_SOURCE_PATH SOURCE_PATH + REPO llvm/llvm-project + REF "llvmorg-${VERSION}" + SHA512 85d272761253428b648f3d111b7308f8cdee74cceebec9e709126c4555ad1e78c443183ad8eb7319e0a15bafb97868ab5b5a3d86ba64812750c568dbf715d8ec + HEAD_REF main + PATCHES + 0001-fix-install-package-dir.patch + 0002-fix-tools-install-dir.patch + 0003-fix-llvm-config.patch + 0004-disable-libomp-aliases.patch + 0005-fix-runtimes.patch + 0006-create-destination-mlir-directory.patch + "${PATCH1_FILE}" +) + +vcpkg_check_features( + OUT_FEATURE_OPTIONS FEATURE_OPTIONS + FEATURES + tools LLVM_BUILD_TOOLS + tools LLVM_INCLUDE_TOOLS + utils LLVM_BUILD_UTILS + utils LLVM_INCLUDE_UTILS + utils LLVM_INSTALL_UTILS + enable-assertions LLVM_ENABLE_ASSERTIONS + enable-rtti LLVM_ENABLE_RTTI + enable-ffi LLVM_ENABLE_FFI + enable-eh LLVM_ENABLE_EH + enable-bindings LLVM_ENABLE_BINDINGS + export-symbols LLVM_EXPORT_SYMBOLS_FOR_PLUGINS +) + +vcpkg_cmake_get_vars(cmake_vars_file) +include("${cmake_vars_file}") + +# LLVM generates CMake error due to Visual Studio version 16.4 is known to miscompile part of LLVM. +# LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN=ON disables this error. +# See https://developercommunity.visualstudio.com/content/problem/845933/miscompile-boolean-condition-deduced-to-be-always.html +# and thread "[llvm-dev] Longstanding failing tests - clang-tidy, MachO, Polly" on llvm-dev Jan 21-23 2020. +if(VCPKG_DETECTED_CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" AND VCPKG_DETECTED_MSVC_VERSION LESS "1925") + list(APPEND FEATURE_OPTIONS + -DLLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN=ON + ) +endif() + +# Force enable or disable external libraries +set(llvm_external_libraries + zlib + libxml2 + zstd +) +foreach(external_library IN LISTS llvm_external_libraries) + string(TOLOWER "enable-${external_library}" feature_name) + string(TOUPPER "LLVM_ENABLE_${external_library}" define_name) + if(feature_name IN_LIST FEATURES) + list(APPEND FEATURE_OPTIONS + -D${define_name}=FORCE_ON + ) + else() + list(APPEND FEATURE_OPTIONS + -D${define_name}=OFF + ) + endif() +endforeach() + +# LLVM_ABI_BREAKING_CHECKS can be WITH_ASSERTS (default), FORCE_ON or FORCE_OFF. +# By default in LLVM, abi-breaking checks are enabled if assertions are enabled. +# however, this breaks linking with the debug versions, since the option is +# baked into the header files; thus, we always turn off LLVM_ABI_BREAKING_CHECKS +# unless the user asks for it +if("enable-abi-breaking-checks" IN_LIST FEATURES) + # Force enable abi-breaking checks. + list(APPEND FEATURE_OPTIONS + -DLLVM_ABI_BREAKING_CHECKS=FORCE_ON + ) +else() + # Force disable abi-breaking checks. + list(APPEND FEATURE_OPTIONS + -DLLVM_ABI_BREAKING_CHECKS=FORCE_OFF + ) +endif() + +# All projects: bolt;clang;clang-tools-extra;lld;lldb;mlir;polly +# Extra projects: flang +set(LLVM_ENABLE_PROJECTS) +if("bolt" IN_LIST FEATURES) + list(APPEND LLVM_ENABLE_PROJECTS "bolt") + list(APPEND FEATURE_OPTIONS + -DBOLT_TOOLS_INSTALL_DIR:PATH=tools/llvm + ) +endif() +if("clang" IN_LIST FEATURES) + list(APPEND LLVM_ENABLE_PROJECTS "clang") + vcpkg_check_features( + OUT_FEATURE_OPTIONS CLANG_FEATURE_OPTIONS + FEATURES + clang-enable-cir CLANG_ENABLE_CIR + clang-enable-static-analyzer CLANG_ENABLE_STATIC_ANALYZER + ) + string(REGEX MATCH "^[0-9]+" CLANG_VERSION_MAJOR ${VERSION}) + list(APPEND CLANG_FEATURE_OPTIONS + -DCLANG_INSTALL_PACKAGE_DIR:PATH=share/clang + -DCLANG_TOOLS_INSTALL_DIR:PATH=tools/llvm + # 1) LLVM/Clang tools are relocated from ./bin/ to ./tools/llvm/ (CLANG_TOOLS_INSTALL_DIR=tools/llvm) + # 2) Clang resource files should be relocated from lib/clang/ to ../tools/llvm/lib/clang/ + -DCLANG_RESOURCE_DIR=lib/clang/${CLANG_VERSION_MAJOR} + ) +endif() +if("clang-tools-extra" IN_LIST FEATURES) + list(APPEND LLVM_ENABLE_PROJECTS "clang-tools-extra") +endif() +if("flang" IN_LIST FEATURES) + if(VCPKG_DETECTED_CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" AND VCPKG_TARGET_ARCHITECTURE STREQUAL "x86") + message(FATAL_ERROR "Building Flang with MSVC is not supported on x86. Disable it until issues are fixed.") + endif() + list(APPEND LLVM_ENABLE_PROJECTS "flang") + list(APPEND FEATURE_OPTIONS + -DFLANG_INSTALL_PACKAGE_DIR:PATH=share/flang + -DFLANG_TOOLS_INSTALL_DIR:PATH=tools/llvm + ) + list(APPEND FEATURE_OPTIONS + # Flang requires C++17 + -DCMAKE_CXX_STANDARD=17 + ) +endif() +if("lld" IN_LIST FEATURES) + list(APPEND LLVM_ENABLE_PROJECTS "lld") + list(APPEND FEATURE_OPTIONS + -DLLD_INSTALL_PACKAGE_DIR:PATH=share/lld + -DLLD_TOOLS_INSTALL_DIR:PATH=tools/llvm + ) +endif() +if("lldb" IN_LIST FEATURES) + list(APPEND LLVM_ENABLE_PROJECTS "lldb") + list(APPEND FEATURE_OPTIONS + -DLLDB_ENABLE_CURSES=OFF + ) +endif() +if("mlir" IN_LIST FEATURES) + list(APPEND LLVM_ENABLE_PROJECTS "mlir") + list(APPEND FEATURE_OPTIONS + -DMLIR_INSTALL_PACKAGE_DIR:PATH=share/mlir + -DMLIR_TOOLS_INSTALL_DIR:PATH=tools/llvm + -DMLIR_INSTALL_AGGREGATE_OBJECTS=OFF # Disables installation of object files in lib/objects-{CMAKE_BUILD_TYPE}. + ) + if("enable-mlir-python-bindings" IN_LIST FEATURES) + list(APPEND FEATURE_OPTIONS + -DMLIR_ENABLE_BINDINGS_PYTHON=ON + "-Dpybind11_DIR=${CURRENT_INSTALLED_DIR}/share/pybind11" + ) + endif() +endif() +if("polly" IN_LIST FEATURES) + list(APPEND LLVM_ENABLE_PROJECTS "polly") + list(APPEND FEATURE_OPTIONS + -DPOLLY_INSTALL_PACKAGE_DIR:PATH=share/polly + ) +endif() + +# Supported runtimes: libc;libclc;libcxx;libcxxabi;libunwind;compiler-rt;openmp;llvm-libgcc;offload;flang-rt +set(LLVM_ENABLE_RUNTIMES) +if("libc" IN_LIST FEATURES) + list(APPEND LLVM_ENABLE_RUNTIMES "libc") +endif() +if("libclc" IN_LIST FEATURES) + list(APPEND LLVM_ENABLE_RUNTIMES "libclc") +endif() +if("libcxx" IN_LIST FEATURES) + if(VCPKG_DETECTED_CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" AND VCPKG_DETECTED_MSVC_VERSION LESS "1914") + # libcxx supports being built with clang-cl, but not with MSVC’s cl.exe, as cl doesn’t support the #include_next extension. + # Furthermore, VS 2017 or newer (19.14) is required. + # More info: https://releases.llvm.org/17.0.1/projects/libcxx/docs/BuildingLibcxx.html#support-for-windows + message(FATAL_ERROR "libcxx requiries MSVC 19.14 or newer.") + endif() + list(APPEND LLVM_ENABLE_RUNTIMES "libcxx") +endif() +if("libcxxabi" IN_LIST FEATURES) + list(APPEND LLVM_ENABLE_RUNTIMES "libcxxabi") +endif() +if("libunwind" IN_LIST FEATURES) + list(APPEND LLVM_ENABLE_RUNTIMES "libunwind") +endif() +if("compiler-rt" IN_LIST FEATURES) + list(APPEND LLVM_ENABLE_RUNTIMES "compiler-rt") + vcpkg_check_features( + OUT_FEATURE_OPTIONS COMPILER_RT_FEATURE_OPTIONS + FEATURES + enable-ios COMPILER_RT_ENABLE_IOS + ) +endif() +if("openmp" IN_LIST FEATURES) + list(APPEND LLVM_ENABLE_RUNTIMES "openmp") +endif() + +# this is for normal targets +set(known_llvm_targets + AArch64 + AMDGPU + ARM + AVR + BPF + Hexagon + Lanai + LoongArch + Mips + MSP430 + NVPTX + PowerPC + RISCV + Sparc + SPIRV + SystemZ + VE + WebAssembly + X86 + XCore +) + +set(LLVM_TARGETS_TO_BUILD) +foreach(llvm_target IN LISTS known_llvm_targets) + string(TOLOWER "target-${llvm_target}" feature_name) + if(feature_name IN_LIST FEATURES) + list(APPEND LLVM_TARGETS_TO_BUILD "${llvm_target}") + endif() +endforeach() + +# this is for experimental targets +set(known_llvm_experimental_targets + ARC + CSKY + DirectX + M68k + Xtensa +) + +set(LLVM_EXPERIMENTAL_TARGETS_TO_BUILD) +foreach(llvm_target IN LISTS known_llvm_experimental_targets) + string(TOLOWER "target-${llvm_target}" feature_name) + if(feature_name IN_LIST FEATURES) + list(APPEND LLVM_EXPERIMENTAL_TARGETS_TO_BUILD "${llvm_target}") + endif() +endforeach() + +vcpkg_find_acquire_program(PYTHON3) +get_filename_component(PYTHON3_DIR ${PYTHON3} DIRECTORY) +vcpkg_add_to_path(PREPEND "${PYTHON3_DIR}") + +file(REMOVE "${SOURCE_PATH}/llvm/cmake/modules/Findzstd.cmake") + +if("${LLVM_ENABLE_RUNTIMES}" STREQUAL "") + list(APPEND FEATURE_OPTIONS + -DLLVM_INCLUDE_RUNTIMES=OFF + -DLLVM_BUILD_RUNTIMES=OFF + -DLLVM_BUILD_RUNTIME=OFF + ) +endif() + +# At least one target must be specified, otherwise default to "all". +if("${LLVM_TARGETS_TO_BUILD}" STREQUAL "") + set(LLVM_TARGETS_TO_BUILD "all") +endif() + +vcpkg_cmake_configure( + SOURCE_PATH "${SOURCE_PATH}/llvm" + OPTIONS + -DLLVM_INCLUDE_EXAMPLES=OFF + -DLLVM_BUILD_EXAMPLES=OFF + -DLLVM_INCLUDE_TESTS=OFF + -DLLVM_BUILD_TESTS=OFF + -DLLVM_INCLUDE_BENCHMARKS=OFF + -DLLVM_BUILD_BENCHMARKS=OFF + # Force TableGen to be built with optimization. This will significantly improve build time. + -DLLVM_OPTIMIZED_TABLEGEN=ON + -DPACKAGE_VERSION=${VERSION} + # Limit the maximum number of concurrent link jobs to 1. This should fix low amount of memory issue for link. + -DLLVM_PARALLEL_LINK_JOBS=1 + -DLLVM_INSTALL_PACKAGE_DIR:PATH=share/llvm + -DLLVM_TOOLS_INSTALL_DIR:PATH=tools/llvm + "-DLLVM_ENABLE_PROJECTS=${LLVM_ENABLE_PROJECTS}" + "-DLLVM_ENABLE_RUNTIMES=${LLVM_ENABLE_RUNTIMES}" + "-DLLVM_TARGETS_TO_BUILD=${LLVM_TARGETS_TO_BUILD}" + "-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=${LLVM_EXPERIMENTAL_TARGETS_TO_BUILD}" + ${FEATURE_OPTIONS} + ${CLANG_FEATURE_OPTIONS} + ${COMPILER_RT_FEATURE_OPTIONS} +) + +vcpkg_cmake_install(ADD_BIN_TO_PATH) + +function(llvm_cmake_package_config_fixup package_name) + cmake_parse_arguments("arg" "DO_NOT_DELETE_PARENT_CONFIG_PATH" "FEATURE_NAME;CONFIG_PATH" "" ${ARGN}) + if(NOT DEFINED arg_FEATURE_NAME) + set(arg_FEATURE_NAME ${package_name}) + endif() + if("${arg_FEATURE_NAME}" STREQUAL "${PORT}" OR "${arg_FEATURE_NAME}" IN_LIST FEATURES) + set(args) + list(APPEND args PACKAGE_NAME "${package_name}") + if(arg_DO_NOT_DELETE_PARENT_CONFIG_PATH) + list(APPEND args "DO_NOT_DELETE_PARENT_CONFIG_PATH") + endif() + if(arg_CONFIG_PATH) + list(APPEND args "CONFIG_PATH" "${arg_CONFIG_PATH}") + endif() + vcpkg_cmake_config_fixup(${args}) + file(INSTALL "${SOURCE_PATH}/${arg_FEATURE_NAME}/LICENSE.TXT" DESTINATION "${CURRENT_PACKAGES_DIR}/share/${package_name}" RENAME copyright) + if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/${package_name}_usage") + file(INSTALL "${CMAKE_CURRENT_LIST_DIR}/${package_name}_usage" DESTINATION "${CURRENT_PACKAGES_DIR}/share/${package_name}" RENAME usage) + endif() + endif() +endfunction() + +llvm_cmake_package_config_fixup("clang" DO_NOT_DELETE_PARENT_CONFIG_PATH) +llvm_cmake_package_config_fixup("flang" DO_NOT_DELETE_PARENT_CONFIG_PATH) +llvm_cmake_package_config_fixup("lld" DO_NOT_DELETE_PARENT_CONFIG_PATH) +llvm_cmake_package_config_fixup("mlir" DO_NOT_DELETE_PARENT_CONFIG_PATH) +llvm_cmake_package_config_fixup("polly" DO_NOT_DELETE_PARENT_CONFIG_PATH) +llvm_cmake_package_config_fixup("llvm") + +if(EXISTS "${CURRENT_PACKAGES_DIR}/debug/share/pkgconfig") + file(RENAME "${CURRENT_PACKAGES_DIR}/debug/share/pkgconfig" "${CURRENT_PACKAGES_DIR}/debug/lib/pkgconfig") +endif() +if(EXISTS "${CURRENT_PACKAGES_DIR}/share/pkgconfig") + file(RENAME "${CURRENT_PACKAGES_DIR}/share/pkgconfig" "${CURRENT_PACKAGES_DIR}/lib/pkgconfig") +endif() +vcpkg_fixup_pkgconfig() + +vcpkg_copy_tool_dependencies("${CURRENT_PACKAGES_DIR}/tools/${PORT}") + +# Move Clang's runtime libraries from bin/lib to tools/${PORT}/lib +if(EXISTS "${CURRENT_PACKAGES_DIR}/bin/lib") + file(MAKE_DIRECTORY "${CURRENT_PACKAGES_DIR}/tools/${PORT}") + file(RENAME "${CURRENT_PACKAGES_DIR}/bin/lib" "${CURRENT_PACKAGES_DIR}/tools/${PORT}/lib") +endif() +if(EXISTS "${CURRENT_PACKAGES_DIR}/debug/bin/lib") + file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/bin/lib") +endif() + +# Remove debug headers and tools +if(NOT DEFINED VCPKG_BUILD_TYPE OR VCPKG_BUILD_TYPE STREQUAL "debug") + file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/include" + "${CURRENT_PACKAGES_DIR}/debug/share" + "${CURRENT_PACKAGES_DIR}/debug/tools" + ) +endif() + +# LLVM generates shared libraries in a static build (LLVM-C.dll, libclang.dll, LTO.dll, Remarks.dll, ...) +# for the corresponding export targets (used in LLVMExports-.cmake files on the Windows platform) +if(VCPKG_TARGET_IS_WINDOWS) + set(VCPKG_POLICY_DLLS_IN_STATIC_LIBRARY enabled) +else() + file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/bin" + "${CURRENT_PACKAGES_DIR}/debug/bin" + ) +endif() \ No newline at end of file diff --git a/ci/vcpkg/overlay/llvm/vcpkg.json b/ci/vcpkg/overlay/llvm/vcpkg.json new file mode 100644 index 000000000000..df3f3bb2b053 --- /dev/null +++ b/ci/vcpkg/overlay/llvm/vcpkg.json @@ -0,0 +1,492 @@ +{ + "name": "llvm", + "version": "21.1.1", + "description": "The LLVM Compiler Infrastructure.", + "homepage": "https://llvm.org", + "license": "Apache-2.0 WITH LLVM-exception", + "supports": "!uwp & !(arm & windows)", + "dependencies": [ + { + "name": "atl", + "platform": "windows & !mingw" + }, + { + "name": "vcpkg-cmake", + "host": true + }, + { + "name": "vcpkg-cmake-config", + "host": true + }, + { + "name": "vcpkg-cmake-get-vars", + "host": true + } + ], + "default-features": [ + "clang", + "enable-terminfo", + "default-targets", + "enable-bindings", + "enable-zlib", + "enable-zstd", + "lld", + "tools" + ], + "features": { + "bolt": { + "description": "BOLT is a post-link optimizer developed to speed up large applications.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "tools" + ] + } + ] + }, + "clang": { + "description": "Include C Language Family Front-end.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "tools" + ] + } + ] + }, + "clang-enable-cir": { + "description": "Include ClangIR.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "clang", + "mlir" + ] + } + ] + }, + "clang-enable-static-analyzer": { + "description": "Include static analyzer in Clang binary.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "clang" + ] + } + ] + }, + "clang-tools-extra": { + "description": "Include Clang tools.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "clang" + ] + } + ] + }, + "compiler-rt": { + "description": "Include compiler's runtime libraries.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "clang" + ] + } + ] + }, + "default-targets": { + "description": "Build with platform-specific default targets.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "target-aarch64" + ], + "platform": "arm64" + }, + { + "name": "llvm", + "default-features": false, + "features": [ + "target-x86" + ], + "platform": "x86 | x64" + }, + { + "name": "llvm", + "default-features": false, + "features": [ + "target-arm" + ], + "platform": "arm & !arm64" + } + ] + }, + "enable-abi-breaking-checks": { + "description": "Build LLVM with LLVM_ABI_BREAKING_CHECKS=FORCE_ON." + }, + "enable-assertions": { + "description": "Build LLVM with assertions." + }, + "enable-bindings": { + "description": "Build bindings." + }, + "enable-eh": { + "description": "Build LLVM with exception handler.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "enable-rtti" + ] + } + ] + }, + "enable-ffi": { + "description": "Build LLVM with FFI.", + "dependencies": [ + "libffi" + ] + }, + "enable-ios": { + "description": "Build compiler-rt for iOS SDK.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "target-arm" + ] + } + ] + }, + "enable-libxml2": { + "description": "Build with LibXml2.", + "dependencies": [ + "libxml2" + ] + }, + "enable-mlir-python-bindings": { + "description": "Build MLIR Python bindings.", + "supports": "!(windows & static)", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "mlir" + ] + }, + "pybind11", + "python3" + ] + }, + "enable-rtti": { + "description": "Build LLVM with run-time type information." + }, + "enable-zlib": { + "description": "Build with ZLib.", + "dependencies": [ + "zlib" + ] + }, + "enable-terminfo": { + "description": "Use terminfo database if available." + }, + "enable-zstd": { + "description": "Build with zstd.", + "dependencies": [ + "zstd" + ] + }, + "export-symbols": { + "description": "Export symbols for plugins." + }, + "flang": { + "description": "Include Fortran front end.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "clang", + "mlir", + "tools" + ] + } + ] + }, + "libc": { + "description": "Include libc library.", + "supports": "linux", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "clang", + "tools" + ] + } + ] + }, + "libclc": { + "description": "Include OpenCL library.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "clang", + "tools" + ] + } + ] + }, + "libcxx": { + "description": "Include libcxx library.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "clang", + "libcxxabi", + "tools" + ] + } + ] + }, + "libcxxabi": { + "description": "Include libcxxabi library.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "clang", + "libcxx", + "tools" + ] + } + ] + }, + "libunwind": { + "description": "Include libunwind library.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "tools" + ] + } + ] + }, + "lld": { + "description": "Include LLVM linker.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "tools" + ] + } + ] + }, + "lldb": { + "description": "Include LLVM debugger.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "clang", + "tools" + ] + } + ] + }, + "mlir": { + "description": "Include MLIR (Multi-Level IR Compiler Framework) project.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "tools", + "utils" + ] + } + ] + }, + "openmp": { + "description": "Include LLVM OpenMP libraries.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "clang", + "utils" + ] + } + ] + }, + "polly": { + "description": "Include Polly (Polyhedral optimizations for LLVM) project.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "tools", + "utils" + ] + } + ] + }, + "target-aarch64": { + "description": "Build with AArch64 backend." + }, + "target-all": { + "description": "Build with all backends.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "target-aarch64", + "target-amdgpu", + "target-arc", + "target-arm", + "target-avr", + "target-bpf", + "target-csky", + "target-directx", + "target-hexagon", + "target-lanai", + "target-loongarch", + "target-m68k", + "target-mips", + "target-msp430", + "target-nvptx", + "target-powerpc", + "target-riscv", + "target-sparc", + "target-spirv", + "target-systemz", + "target-ve", + "target-webassembly", + "target-x86", + "target-xcore", + "target-xtensa" + ] + } + ] + }, + "target-amdgpu": { + "description": "Build with AMDGPU backend." + }, + "target-arc": { + "description": "Build with ARC backend (experimental)." + }, + "target-arm": { + "description": "Build with ARM backend." + }, + "target-avr": { + "description": "Build with AVR backend." + }, + "target-bpf": { + "description": "Build with BPF backend." + }, + "target-csky": { + "description": "Build with CSKY backend (experimental)." + }, + "target-directx": { + "description": "Build with DirectX backend (experimental)." + }, + "target-hexagon": { + "description": "Build with Hexagon backend." + }, + "target-lanai": { + "description": "Build with Lanai backend." + }, + "target-loongarch": { + "description": "Build with LoongArch backend." + }, + "target-m68k": { + "description": "Build with M68k backend (experimental)." + }, + "target-mips": { + "description": "Build with Mips backend." + }, + "target-msp430": { + "description": "Build with MSP430 backend." + }, + "target-nvptx": { + "description": "Build with NVPTX backend." + }, + "target-powerpc": { + "description": "Build with PowerPC backend." + }, + "target-riscv": { + "description": "Build with RISC-V backend." + }, + "target-sparc": { + "description": "Build with Sparc backend." + }, + "target-spirv": { + "description": "Build with SPIRV backend." + }, + "target-systemz": { + "description": "Build with SystemZ backend." + }, + "target-ve": { + "description": "Build with VE backend." + }, + "target-webassembly": { + "description": "Build with WebAssembly backend." + }, + "target-x86": { + "description": "Build with X86 backend." + }, + "target-xcore": { + "description": "Build with XCore backend." + }, + "target-xtensa": { + "description": "Build with Xtensa backend (experimental)." + }, + "tools": { + "description": "Build LLVM tools." + }, + "utils": { + "description": "Build LLVM utils.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "tools" + ] + } + ] + } + } +} diff --git a/ci/vcpkg/overlay/symengine/portfile.cmake b/ci/vcpkg/overlay/symengine/portfile.cmake new file mode 100644 index 000000000000..8bb494ec611a --- /dev/null +++ b/ci/vcpkg/overlay/symengine/portfile.cmake @@ -0,0 +1,62 @@ +vcpkg_from_github( + OUT_SOURCE_PATH SOURCE_PATH + REPO symengine/symengine + REF "v${VERSION}" + SHA512 2b6012ed65064ff81c8828032c5a3148340582274e3604db2a43797ddbaa191520ed97da41efc2e842ba4a25326f53becc51f1e98935e8c34625bc5eaac8397f + HEAD_REF master +) + +vcpkg_check_features( + OUT_FEATURE_OPTIONS FEATURE_OPTIONS + FEATURES + arb WITH_ARB + flint WITH_FLINT + mpfr WITH_MPFR + tcmalloc WITH_TCMALLOC + llvm WITH_LLVM +) + +if(integer-class-flint IN_LIST FEATURES) + set(INTEGER_CLASS flint) +endif() + +if(VCPKG_TARGET_IS_UWP) + set(VCPKG_C_FLAGS "${VCPKG_C_FLAGS} -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE") + set(VCPKG_CXX_FLAGS "${VCPKG_CXX_FLAGS} -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE") +endif() + +vcpkg_cmake_configure( + SOURCE_PATH "${SOURCE_PATH}" + OPTIONS + -DINTEGER_CLASS=${INTEGER_CLASS} + -DBUILD_BENCHMARKS=no + -DBUILD_TESTS=no + -DMSVC_WARNING_LEVEL=3 + -DMSVC_USE_MT=no + -DWITH_SYMENGINE_RCP=yes + -DWITH_SYMENGINE_TEUCHOS=no + -DWITH_SYMENGINE_THREAD_SAFE=yes + ${FEATURE_OPTIONS} +) + +vcpkg_cmake_install() + +if(EXISTS "${CURRENT_PACKAGES_DIR}/CMake") + vcpkg_cmake_config_fixup(CONFIG_PATH CMake) +elseif(EXISTS "${CURRENT_PACKAGES_DIR}/lib/cmake/${PORT}") + vcpkg_cmake_config_fixup(CONFIG_PATH lib/cmake/${PORT}) +endif() + +vcpkg_copy_pdbs() + +file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/include") +file(REMOVE "${CURRENT_PACKAGES_DIR}/include/symengine/symengine_config_cling.h") + +vcpkg_replace_string("${CURRENT_PACKAGES_DIR}/share/symengine/SymEngineConfig.cmake" "${CURRENT_BUILDTREES_DIR}" "") # not used, inside if (False) +vcpkg_replace_string("${CURRENT_PACKAGES_DIR}/share/symengine/SymEngineConfig.cmake" + [[${SYMENGINE_CMAKE_DIR}/../../../include]] + [[${SYMENGINE_CMAKE_DIR}/../../include]] + IGNORE_UNCHANGED +) + +vcpkg_install_copyright(FILE_LIST "${SOURCE_PATH}/LICENSE") diff --git a/ci/vcpkg/overlay/symengine/vcpkg.json b/ci/vcpkg/overlay/symengine/vcpkg.json new file mode 100644 index 000000000000..702d5fd06f37 --- /dev/null +++ b/ci/vcpkg/overlay/symengine/vcpkg.json @@ -0,0 +1,73 @@ +{ + "name": "symengine", + "version": "0.14.0", + "description": "SymEngine is a fast symbolic manipulation library", + "homepage": "https://github.com/symengine/symengine", + "license": "BSD-3-Clause", + "supports": "!uwp", + "dependencies": [ + "boost-math", + "boost-random", + { + "name": "symengine", + "default-features": false, + "features": [ + "integer-class-flint" + ] + }, + { + "name": "vcpkg-cmake", + "host": true + }, + { + "name": "vcpkg-cmake-config", + "host": true + } + ], + "default-features": [ + "arb", + "llvm", + "mpfr" + ], + "features": { + "arb": { + "description": "Build with arb", + "dependencies": [ + "arb" + ] + }, + "flint": { + "description": "Build with flint", + "dependencies": [ + "flint" + ] + }, + "integer-class-flint": { + "description": "Use flint integer class", + "dependencies": [ + "flint" + ] + }, + "llvm": { + "description": "Build with LLVM", + "dependencies": [ + { + "name": "llvm", + "default-features": false + } + ] + }, + "mpfr": { + "description": "Build with mpfr", + "dependencies": [ + "mpfr" + ] + }, + "tcmalloc": { + "description": "Build with tcmalloc", + "dependencies": [ + "gperftools" + ] + } + } +} diff --git a/ci/vcpkg/vcpkg.json b/ci/vcpkg/vcpkg.json index 58b1382d1ca8..26479bdb8c13 100644 --- a/ci/vcpkg/vcpkg.json +++ b/ci/vcpkg/vcpkg.json @@ -78,6 +78,7 @@ { "name": "llvm", "default-features": false, + "version>=": "21.1.1", "features": [ "clang", "default-targets", diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 6c0c1323645e..2471bbfa2ff7 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -160,6 +160,7 @@ set(ARROW_DOC_DIR "share/doc/${PROJECT_NAME}") set(BUILD_SUPPORT_DIR "${CMAKE_SOURCE_DIR}/build-support") set(ARROW_LLVM_VERSIONS + "21.1" "19.1" "18.1" "17.0" diff --git a/cpp/cmake_modules/FindLLVMAlt.cmake b/cpp/cmake_modules/FindLLVMAlt.cmake index 69f680824b08..139984bc92b6 100644 --- a/cpp/cmake_modules/FindLLVMAlt.cmake +++ b/cpp/cmake_modules/FindLLVMAlt.cmake @@ -50,8 +50,11 @@ if(NOT LLVM_FOUND) "${ARROW_LLVM_VERSION}") execute_process(COMMAND ${BREW} --prefix "llvm@${ARROW_LLVM_VERSION_MAJOR}" OUTPUT_VARIABLE LLVM_BREW_PREFIX - OUTPUT_STRIP_TRAILING_WHITESPACE) - list(APPEND LLVM_HINTS ${LLVM_BREW_PREFIX}) + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET) + if(LLVM_BREW_PREFIX) + list(APPEND LLVM_HINTS ${LLVM_BREW_PREFIX}) + endif() endif() endif() diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 9dcf4d2c06f0..b3245cf9e70f 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -697,8 +697,7 @@ if(DEFINED ENV{ARROW_GTEST_URL}) set(GTEST_SOURCE_URL "$ENV{ARROW_GTEST_URL}") else() set_urls(GTEST_SOURCE_URL - "https://github.com/google/googletest/archive/release-${ARROW_GTEST_BUILD_VERSION}.tar.gz" - "https://chromium.googlesource.com/external/github.com/google/googletest/+archive/release-${ARROW_GTEST_BUILD_VERSION}.tar.gz" + "https://github.com/google/googletest/releases/download/v${ARROW_GTEST_BUILD_VERSION}/googletest-${ARROW_GTEST_BUILD_VERSION}.tar.gz" "${THIRDPARTY_MIRROR_URL}/gtest-${ARROW_GTEST_BUILD_VERSION}.tar.gz") endif() diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h index 4e47fd293251..0445c37da0a4 100644 --- a/cpp/src/arrow/buffer.h +++ b/cpp/src/arrow/buffer.h @@ -522,10 +522,10 @@ class ARROW_EXPORT ResizableBuffer : public MutableBuffer { } public: - uint8_t* offsetBuffer; - int64_t offsetCapacity; - uint8_t* validityBuffer; - uint8_t* outerValidityBuffer; + uint8_t* offsetBuffer; + int64_t offsetCapacity; + uint8_t* validityBuffer; + uint8_t* outerValidityBuffer; protected: ResizableBuffer(uint8_t* data, int64_t size) : MutableBuffer(data, size) { @@ -533,15 +533,14 @@ class ARROW_EXPORT ResizableBuffer : public MutableBuffer { offsetCapacity = 0; validityBuffer = nullptr; outerValidityBuffer = nullptr; - } ResizableBuffer(uint8_t* data, int64_t size, std::shared_ptr mm) : MutableBuffer(data, size, std::move(mm)) { - offsetBuffer = nullptr; - offsetCapacity = 0; - validityBuffer = nullptr; - outerValidityBuffer = nullptr; - } + offsetBuffer = nullptr; + offsetCapacity = 0; + validityBuffer = nullptr; + outerValidityBuffer = nullptr; + } }; /// \defgroup buffer-allocation-functions Functions for allocating buffers diff --git a/cpp/src/gandiva/annotator.cc b/cpp/src/gandiva/annotator.cc index abd5ba6b1a4b..d98315de9a42 100644 --- a/cpp/src/gandiva/annotator.cc +++ b/cpp/src/gandiva/annotator.cc @@ -66,7 +66,8 @@ FieldDescriptorPtr Annotator::MakeDesc(FieldPtr field, bool is_output) { child_valid_buffer_ptr_idx = buffer_count_++; } return std::make_shared(field, data_idx, validity_idx, offsets_idx, - data_buffer_ptr_idx, child_offsets_idx, child_valid_buffer_ptr_idx); + data_buffer_ptr_idx, child_offsets_idx, + child_valid_buffer_ptr_idx); } int Annotator::AddHolderPointer(void* holder) { @@ -102,11 +103,11 @@ void Annotator::PrepareBuffersForField(const FieldDescriptor& desc, eval_batch->SetBuffer(desc.child_data_offsets_idx(), child_offsets_buf, array_data.child_data.at(0)->offset); - uint8_t* child_valid_buf = reinterpret_cast( - array_data.child_data.at(0)->buffers[0].get()); + uint8_t* child_valid_buf = + reinterpret_cast(array_data.child_data.at(0)->buffers[0].get()); eval_batch->SetBuffer(desc.child_data_validity_idx(), child_valid_buf, array_data.child_data.at(0)->offset); - + } else { // if list field is input field, just put buffer data into eval batch uint8_t* child_offsets_buf = const_cast( @@ -114,18 +115,18 @@ void Annotator::PrepareBuffersForField(const FieldDescriptor& desc, eval_batch->SetBuffer(desc.child_data_offsets_idx(), child_offsets_buf, array_data.child_data.at(0)->offset); - uint8_t* child_valid_buf = const_cast( - array_data.child_data.at(0)->buffers[0]->data()); + uint8_t* child_valid_buf = + const_cast(array_data.child_data.at(0)->buffers[0]->data()); eval_batch->SetBuffer(desc.child_data_offsets_idx(), child_valid_buf, array_data.child_data.at(0)->offset); } } if (array_data.type->id() != arrow::Type::LIST || arrow::is_binary_like(array_data.type->field(0)->type()->id())) { - // primitive type list data buffer index is 1 - // binary like type list data buffer index is 2 - ++buffer_idx; - } + // primitive type list data buffer index is 1 + // binary like type list data buffer index is 2 + ++buffer_idx; + } } int const childDataIndex = 0; @@ -133,17 +134,18 @@ void Annotator::PrepareBuffersForField(const FieldDescriptor& desc, uint8_t* data_buf = const_cast(array_data.buffers[buffer_idx]->data()); eval_batch->SetBuffer(desc.data_idx(), data_buf, array_data.offset); } else { - uint8_t* data_buf = - const_cast(array_data.child_data.at(childDataIndex)->buffers[buffer_idx]->data()); + uint8_t* data_buf = const_cast( + array_data.child_data.at(childDataIndex)->buffers[buffer_idx]->data()); eval_batch->SetBuffer(desc.data_idx(), data_buf, array_data.child_data.at(0)->offset); - + int const childDataBufferIndex = 0; - if (array_data.child_data.at(childDataIndex)->buffers[childDataBufferIndex] ) { - uint8_t* child_valid_buf = const_cast( - array_data.child_data.at(childDataIndex)->buffers[childDataBufferIndex]->data()); - eval_batch->SetBuffer(desc.child_data_validity_idx(), child_valid_buf, 0); + if (array_data.child_data.at(childDataIndex)->buffers[childDataBufferIndex]) { + uint8_t* child_valid_buf = + const_cast(array_data.child_data.at(childDataIndex) + ->buffers[childDataBufferIndex] + ->data()); + eval_batch->SetBuffer(desc.child_data_validity_idx(), child_valid_buf, 0); } - } if (is_output) { @@ -161,7 +163,6 @@ void Annotator::PrepareBuffersForField(const FieldDescriptor& desc, array_data.child_data.at(0)->offset); } } - } EvalBatchPtr Annotator::PrepareEvalBatch(const arrow::RecordBatch& record_batch, diff --git a/cpp/src/gandiva/array_ops.cc b/cpp/src/gandiva/array_ops.cc index 64548bf09abb..c05fe828f07c 100644 --- a/cpp/src/gandiva/array_ops.cc +++ b/cpp/src/gandiva/array_ops.cc @@ -23,31 +23,32 @@ #include "arrow/util/value_parsing.h" -#include "gandiva/gdv_function_stubs.h" #include "gandiva/engine.h" #include "gandiva/exported_funcs.h" +#include "gandiva/gdv_function_stubs.h" /// Stub functions that can be accessed from LLVM or the pre-compiled library. template -Type* array_remove_template(int64_t context_ptr, const Type* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - Type remove_data, bool remove_data_valid, - int64_t loop_var, int64_t validity_index_var, - bool* valid_row, int32_t* out_len, int32_t** valid_ptr) -{ +Type* array_remove_template(int64_t context_ptr, const Type* entry_buf, int32_t entry_len, + const int32_t* entry_validity, bool combined_row_validity, + Type remove_data, bool remove_data_valid, int64_t loop_var, + int64_t validity_index_var, bool* valid_row, int32_t* out_len, + int32_t** valid_ptr) { std::vector newInts; - const int32_t* entry_validityAdjusted = entry_validity - (loop_var ); + const int32_t* entry_validityAdjusted = entry_validity - (loop_var); int64_t validityBitIndex = 0; - //The validity index already has the current row length added to it, so decrement. + // The validity index already has the current row length added to it, so decrement. validityBitIndex = validity_index_var - entry_len; std::vector outValid; for (int i = 0; i < entry_len; i++) { Type entry_item = *(entry_buf + i); if (remove_data_valid && entry_item == remove_data) { - //Do not add the item to remove. - } else if (!arrow::bit_util::GetBit(reinterpret_cast(entry_validityAdjusted), validityBitIndex + i)) { + // Do not add the item to remove. + } else if (!arrow::bit_util::GetBit( + reinterpret_cast(entry_validityAdjusted), + validityBitIndex + i)) { outValid.push_back(false); newInts.push_back(0); } else { @@ -58,8 +59,8 @@ Type* array_remove_template(int64_t context_ptr, const Type* entry_buf, *out_len = (int)newInts.size(); - //Since this function can remove values we don't know the length ahead of time. - //A fast way to compute Math.ceil(input / 8.0). + // Since this function can remove values we don't know the length ahead of time. + // A fast way to compute Math.ceil(input / 8.0). int validByteSize = (unsigned int)((*out_len) + 7) >> 3; uint8_t* validRet = gdv_fn_context_arena_malloc(context_ptr, validByteSize); @@ -68,15 +69,15 @@ Type* array_remove_template(int64_t context_ptr, const Type* entry_buf, } int32_t outBufferLength = (int)*out_len * sizeof(Type); - //length is number of items, but buffers must account for byte size. + // length is number of items, but buffers must account for byte size. uint8_t* ret = gdv_fn_context_arena_malloc(context_ptr, outBufferLength); memcpy(ret, newInts.data(), outBufferLength); *valid_row = true; - //Return null if the input array is null or the data to remove is null. + // Return null if the input array is null or the data to remove is null. if (!combined_row_validity || !remove_data_valid) { *out_len = 0; - *valid_row = false; //this one is what works for the top level validity. + *valid_row = false; // this one is what works for the top level validity. } *valid_ptr = reinterpret_cast(validRet); @@ -84,23 +85,24 @@ Type* array_remove_template(int64_t context_ptr, const Type* entry_buf, } template -bool array_contains_template(const Type* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - Type contains_data, bool contains_data_valid, - int64_t loop_var, int64_t validity_index_var, - bool* valid_row) { +bool array_contains_template(const Type* entry_buf, int32_t entry_len, + const int32_t* entry_validity, bool combined_row_validity, + Type contains_data, bool contains_data_valid, + int64_t loop_var, int64_t validity_index_var, + bool* valid_row) { if (!combined_row_validity || !contains_data_valid) { *valid_row = false; return false; } *valid_row = true; - const int32_t* entry_validityAdjusted = entry_validity - (loop_var ); + const int32_t* entry_validityAdjusted = entry_validity - (loop_var); int64_t validityBitIndex = validity_index_var - entry_len; - + bool found_null_in_data = false; for (int i = 0; i < entry_len; i++) { - if (!arrow::bit_util::GetBit(reinterpret_cast(entry_validityAdjusted), validityBitIndex + i)) { + if (!arrow::bit_util::GetBit(reinterpret_cast(entry_validityAdjusted), + validityBitIndex + i)) { found_null_in_data = true; continue; } @@ -109,7 +111,7 @@ bool array_contains_template(const Type* entry_buf, return true; } } - //If there is null in the input and the item is not found the result is null. + // If there is null in the input and the item is not found the result is null. if (found_null_in_data) { *valid_row = false; } @@ -119,94 +121,91 @@ bool array_contains_template(const Type* entry_buf, extern "C" { bool array_int32_contains_int32(int64_t context_ptr, const int32_t* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - int32_t contains_data, bool contains_data_valid, - int64_t loop_var, int64_t validity_index_var, - bool* valid_row) { - return array_contains_template(entry_buf, entry_len, entry_validity, - combined_row_validity, contains_data, contains_data_valid, - loop_var, validity_index_var, valid_row); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, int32_t contains_data, + bool contains_data_valid, int64_t loop_var, + int64_t validity_index_var, bool* valid_row) { + return array_contains_template( + entry_buf, entry_len, entry_validity, combined_row_validity, contains_data, + contains_data_valid, loop_var, validity_index_var, valid_row); } bool array_int64_contains_int64(int64_t context_ptr, const int64_t* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - int64_t contains_data, bool contains_data_valid, - int64_t loop_var, int64_t validity_index_var, - bool* valid_row) { - return array_contains_template(entry_buf, entry_len, entry_validity, - combined_row_validity, contains_data, contains_data_valid, - loop_var, validity_index_var, valid_row); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, int64_t contains_data, + bool contains_data_valid, int64_t loop_var, + int64_t validity_index_var, bool* valid_row) { + return array_contains_template( + entry_buf, entry_len, entry_validity, combined_row_validity, contains_data, + contains_data_valid, loop_var, validity_index_var, valid_row); } bool array_float32_contains_float32(int64_t context_ptr, const float* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - float contains_data, bool contains_data_valid, - int64_t loop_var, int64_t validity_index_var, - bool* valid_row) { - return array_contains_template(entry_buf, entry_len, entry_validity, - combined_row_validity, contains_data, contains_data_valid, - loop_var, validity_index_var, valid_row); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, float contains_data, + bool contains_data_valid, int64_t loop_var, + int64_t validity_index_var, bool* valid_row) { + return array_contains_template( + entry_buf, entry_len, entry_validity, combined_row_validity, contains_data, + contains_data_valid, loop_var, validity_index_var, valid_row); } bool array_float64_contains_float64(int64_t context_ptr, const double* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - double contains_data, bool contains_data_valid, - int64_t loop_var, int64_t validity_index_var, - bool* valid_row) { - return array_contains_template(entry_buf, entry_len, entry_validity, - combined_row_validity, contains_data, contains_data_valid, - loop_var, validity_index_var, valid_row); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, double contains_data, + bool contains_data_valid, int64_t loop_var, + int64_t validity_index_var, bool* valid_row) { + return array_contains_template( + entry_buf, entry_len, entry_validity, combined_row_validity, contains_data, + contains_data_valid, loop_var, validity_index_var, valid_row); } - - int32_t* array_int32_remove(int64_t context_ptr, const int32_t* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - int32_t remove_data, bool remove_data_valid, - int64_t loop_var, int64_t validity_index_var, - bool* valid_row, int32_t* out_len, int32_t** valid_ptr) { - return array_remove_template(context_ptr, entry_buf, - entry_len, entry_validity, combined_row_validity, - remove_data, remove_data_valid, - loop_var, validity_index_var, - valid_row, out_len, valid_ptr); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, int32_t remove_data, + bool remove_data_valid, int64_t loop_var, + int64_t validity_index_var, bool* valid_row, int32_t* out_len, + int32_t** valid_ptr) { + return array_remove_template(context_ptr, entry_buf, entry_len, entry_validity, + combined_row_validity, remove_data, + remove_data_valid, loop_var, validity_index_var, + valid_row, out_len, valid_ptr); } int64_t* array_int64_remove(int64_t context_ptr, const int64_t* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - int64_t remove_data, bool remove_data_valid, - int64_t loop_var, int64_t validity_index_var, - bool* valid_row, int32_t* out_len, int32_t** valid_ptr){ - return array_remove_template(context_ptr, entry_buf, - entry_len, entry_validity, combined_row_validity, - remove_data, remove_data_valid, - loop_var, validity_index_var, - valid_row, out_len, valid_ptr); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, int64_t remove_data, + bool remove_data_valid, int64_t loop_var, + int64_t validity_index_var, bool* valid_row, int32_t* out_len, + int32_t** valid_ptr) { + return array_remove_template(context_ptr, entry_buf, entry_len, entry_validity, + combined_row_validity, remove_data, + remove_data_valid, loop_var, validity_index_var, + valid_row, out_len, valid_ptr); } float* array_float32_remove(int64_t context_ptr, const float* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - float remove_data, bool remove_data_valid, - int64_t loop_var, int64_t validity_index_var, - bool* valid_row, int32_t* out_len, int32_t** valid_ptr){ - return array_remove_template(context_ptr, entry_buf, - entry_len, entry_validity, combined_row_validity, - remove_data, remove_data_valid, - loop_var, validity_index_var, - valid_row, out_len, valid_ptr); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, float remove_data, + bool remove_data_valid, int64_t loop_var, + int64_t validity_index_var, bool* valid_row, int32_t* out_len, + int32_t** valid_ptr) { + return array_remove_template(context_ptr, entry_buf, entry_len, entry_validity, + combined_row_validity, remove_data, + remove_data_valid, loop_var, validity_index_var, + valid_row, out_len, valid_ptr); } - double* array_float64_remove(int64_t context_ptr, const double* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - double remove_data, bool remove_data_valid, - int64_t loop_var, int64_t validity_index_var, - bool* valid_row, int32_t* out_len, int32_t** valid_ptr){ - return array_remove_template(context_ptr, entry_buf, - entry_len, entry_validity, combined_row_validity, - remove_data, remove_data_valid, - loop_var, validity_index_var, - valid_row, out_len, valid_ptr); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, double remove_data, + bool remove_data_valid, int64_t loop_var, + int64_t validity_index_var, bool* valid_row, + int32_t* out_len, int32_t** valid_ptr) { + return array_remove_template(context_ptr, entry_buf, entry_len, entry_validity, + combined_row_validity, remove_data, + remove_data_valid, loop_var, validity_index_var, + valid_row, out_len, valid_ptr); } } @@ -215,143 +214,155 @@ arrow::Status ExportedArrayFunctions::AddMappings(Engine* engine) const { std::vector args; auto types = engine->types(); - args = {types->i64_type(), // int64_t execution_context - types->i64_ptr_type(), // int8_t* data ptr - types->i32_type(), // int32_t data length - types->i32_ptr_type(), // input validity buffer - types->i1_type(), // bool input row validity - types->i32_type(), // int32_t value to check for - types->i1_type(), // bool validity --Needed? - types->i64_type(), //in loop var --Needed? - types->i64_type(), //in validity_index_var index into the valdity vector for the current row. - types->i1_ptr_type() //output validity for the row - }; + args = { + types->i64_type(), // int64_t execution_context + types->i64_ptr_type(), // int8_t* data ptr + types->i32_type(), // int32_t data length + types->i32_ptr_type(), // input validity buffer + types->i1_type(), // bool input row validity + types->i32_type(), // int32_t value to check for + types->i1_type(), // bool validity --Needed? + types->i64_type(), // in loop var --Needed? + types->i64_type(), // in validity_index_var index into the valdity vector for the + // current row. + types->i1_ptr_type() // output validity for the row + }; engine->AddGlobalMappingForFunc("array_int32_contains_int32", types->i1_type() /*return_type*/, args, reinterpret_cast(array_int32_contains_int32)); - args = {types->i64_type(), // int64_t execution_context - types->i64_ptr_type(), // int8_t* data ptr - types->i32_type(), // int32_t data length - types->i32_ptr_type(), // input validity buffer - types->i1_type(), // bool input row validity - types->i64_type(), // int32_t value to check for - types->i1_type(), // bool validity --Needed? - types->i64_type(), //in loop var --Needed? - types->i64_type(), //in validity_index_var index into the valdity vector for the current row. - types->i1_ptr_type() //output validity for the row - }; + args = { + types->i64_type(), // int64_t execution_context + types->i64_ptr_type(), // int8_t* data ptr + types->i32_type(), // int32_t data length + types->i32_ptr_type(), // input validity buffer + types->i1_type(), // bool input row validity + types->i64_type(), // int32_t value to check for + types->i1_type(), // bool validity --Needed? + types->i64_type(), // in loop var --Needed? + types->i64_type(), // in validity_index_var index into the valdity vector for the + // current row. + types->i1_ptr_type() // output validity for the row + }; engine->AddGlobalMappingForFunc("array_int64_contains_int64", types->i1_type() /*return_type*/, args, reinterpret_cast(array_int64_contains_int64)); - args = {types->i64_type(), // int64_t execution_context - types->float_ptr_type(), // int8_t* data ptr - types->i32_type(), // int32_t data length - types->i32_ptr_type(), // input validity buffer - types->i1_type(), // bool input row validity - types->float_type(), // int32_t value to check for - types->i1_type(), // bool validity --Needed? - types->i64_type(), //in loop var --Needed? - types->i64_type(), //in validity_index_var index into the valdity vector for the current row. - types->i1_ptr_type() //output validity for the row - }; - - engine->AddGlobalMappingForFunc("array_float32_contains_float32", - types->i1_type() /*return_type*/, args, - reinterpret_cast(array_float32_contains_float32)); - - args = {types->i64_type(), // int64_t execution_context - types->double_ptr_type(), // int8_t* data ptr - types->i32_type(), // int32_t data length - types->i32_ptr_type(), // input validity buffer - types->i1_type(), // bool input row validity - types->double_type(), // int32_t value to check for - types->i1_type(), // bool validity --Needed? - types->i64_type(), //in loop var --Needed? - types->i64_type(), //in validity_index_var index into the valdity vector for the current row. - types->i1_ptr_type() //output validity for the row - }; - - engine->AddGlobalMappingForFunc("array_float64_contains_float64", - types->i1_type() /*return_type*/, args, - reinterpret_cast(array_float64_contains_float64)); - //Array remove. - args = {types->i64_type(), // int64_t execution_context - types->i32_ptr_type(), // int8_t* input data ptr - types->i32_type(), // int32_t input length - types->i32_ptr_type(), // input validity buffer - types->i1_type(), // bool input row validity - types->i32_type(), //value to remove from input - types->i1_type(), // bool validity --Needed? - types->i64_type(), //in loop var --Needed? - types->i64_type(), //in validity_index_var index into the valdity vector for the current row. - types->i1_ptr_type(), //output validity for the row - types->i32_ptr_type(), // output array length - types->i32_ptr_type() //output pointer to new validity buffer - - }; - engine->AddGlobalMappingForFunc("array_int32_remove", - types->i32_ptr_type(), args, + args = { + types->i64_type(), // int64_t execution_context + types->float_ptr_type(), // int8_t* data ptr + types->i32_type(), // int32_t data length + types->i32_ptr_type(), // input validity buffer + types->i1_type(), // bool input row validity + types->float_type(), // int32_t value to check for + types->i1_type(), // bool validity --Needed? + types->i64_type(), // in loop var --Needed? + types->i64_type(), // in validity_index_var index into the valdity vector for the + // current row. + types->i1_ptr_type() // output validity for the row + }; + + engine->AddGlobalMappingForFunc( + "array_float32_contains_float32", types->i1_type() /*return_type*/, args, + reinterpret_cast(array_float32_contains_float32)); + + args = { + types->i64_type(), // int64_t execution_context + types->double_ptr_type(), // int8_t* data ptr + types->i32_type(), // int32_t data length + types->i32_ptr_type(), // input validity buffer + types->i1_type(), // bool input row validity + types->double_type(), // int32_t value to check for + types->i1_type(), // bool validity --Needed? + types->i64_type(), // in loop var --Needed? + types->i64_type(), // in validity_index_var index into the valdity vector for the + // current row. + types->i1_ptr_type() // output validity for the row + }; + + engine->AddGlobalMappingForFunc( + "array_float64_contains_float64", types->i1_type() /*return_type*/, args, + reinterpret_cast(array_float64_contains_float64)); + // Array remove. + args = { + types->i64_type(), // int64_t execution_context + types->i32_ptr_type(), // int8_t* input data ptr + types->i32_type(), // int32_t input length + types->i32_ptr_type(), // input validity buffer + types->i1_type(), // bool input row validity + types->i32_type(), // value to remove from input + types->i1_type(), // bool validity --Needed? + types->i64_type(), // in loop var --Needed? + types->i64_type(), // in validity_index_var index into the valdity vector for the + // current row. + types->i1_ptr_type(), // output validity for the row + types->i32_ptr_type(), // output array length + types->i32_ptr_type() // output pointer to new validity buffer + + }; + engine->AddGlobalMappingForFunc("array_int32_remove", types->i32_ptr_type(), args, reinterpret_cast(array_int32_remove)); - args = {types->i64_type(), // int64_t execution_context - types->i64_ptr_type(), // int8_t* input data ptr - types->i32_type(), // int32_t input length - types->i32_ptr_type(), // input validity buffer - types->i1_type(), // bool input row validity - types->i64_type(), //value to remove from input - types->i1_type(), // bool validity --Needed? - types->i64_type(), //in loop var --Needed? - types->i64_type(), //in validity_index_var index into the valdity vector for the current row. - types->i1_ptr_type(), //output validity for the row - types->i32_ptr_type(), // output array length - types->i32_ptr_type() //output pointer to new validity buffer - - }; - - engine->AddGlobalMappingForFunc("array_int64_remove", - types->i64_ptr_type(), args, + args = { + types->i64_type(), // int64_t execution_context + types->i64_ptr_type(), // int8_t* input data ptr + types->i32_type(), // int32_t input length + types->i32_ptr_type(), // input validity buffer + types->i1_type(), // bool input row validity + types->i64_type(), // value to remove from input + types->i1_type(), // bool validity --Needed? + types->i64_type(), // in loop var --Needed? + types->i64_type(), // in validity_index_var index into the valdity vector for the + // current row. + types->i1_ptr_type(), // output validity for the row + types->i32_ptr_type(), // output array length + types->i32_ptr_type() // output pointer to new validity buffer + + }; + + engine->AddGlobalMappingForFunc("array_int64_remove", types->i64_ptr_type(), args, reinterpret_cast(array_int64_remove)); - args = {types->i64_type(), // int64_t execution_context - types->float_ptr_type(), // float* input data ptr - types->i32_type(), // int32_t input length - types->i32_ptr_type(), // input validity buffer - types->i1_type(), // bool input row validity - types->float_type(), //value to remove from input - types->i1_type(), // bool validity --Needed? - types->i64_type(), //in loop var --Needed? - types->i64_type(), //in validity_index_var index into the valdity vector for the current row. - types->i1_ptr_type(), //output validity for the row - types->i32_ptr_type(), // output array length - types->i32_ptr_type() //output pointer to new validity buffer - - }; - - engine->AddGlobalMappingForFunc("array_float32_remove", - types->float_ptr_type(), args, + args = { + types->i64_type(), // int64_t execution_context + types->float_ptr_type(), // float* input data ptr + types->i32_type(), // int32_t input length + types->i32_ptr_type(), // input validity buffer + types->i1_type(), // bool input row validity + types->float_type(), // value to remove from input + types->i1_type(), // bool validity --Needed? + types->i64_type(), // in loop var --Needed? + types->i64_type(), // in validity_index_var index into the valdity vector for the + // current row. + types->i1_ptr_type(), // output validity for the row + types->i32_ptr_type(), // output array length + types->i32_ptr_type() // output pointer to new validity buffer + + }; + + engine->AddGlobalMappingForFunc("array_float32_remove", types->float_ptr_type(), args, reinterpret_cast(array_float32_remove)); - args = {types->i64_type(), // int64_t execution_context - types->double_ptr_type(), // int8_t* input data ptr - types->i32_type(), // int32_t input length - types->i32_ptr_type(), // input validity buffer - types->i1_type(), // bool input row validity - types->double_type(), //value to remove from input - types->i1_type(), // bool validity --Needed? - types->i64_type(), //in loop var --Needed? - types->i64_type(), //in validity_index_var index into the valdity vector for the current row. - types->i1_ptr_type(), //output validity for the row - types->i32_ptr_type(), // output array length - types->i32_ptr_type() //output pointer to new validity buffer - - }; - - engine->AddGlobalMappingForFunc("array_float64_remove", - types->double_ptr_type(), args, + args = { + types->i64_type(), // int64_t execution_context + types->double_ptr_type(), // int8_t* input data ptr + types->i32_type(), // int32_t input length + types->i32_ptr_type(), // input validity buffer + types->i1_type(), // bool input row validity + types->double_type(), // value to remove from input + types->i1_type(), // bool validity --Needed? + types->i64_type(), // in loop var --Needed? + types->i64_type(), // in validity_index_var index into the valdity vector for the + // current row. + types->i1_ptr_type(), // output validity for the row + types->i32_ptr_type(), // output array length + types->i32_ptr_type() // output pointer to new validity buffer + + }; + + engine->AddGlobalMappingForFunc("array_float64_remove", types->double_ptr_type(), args, reinterpret_cast(array_float64_remove)); return arrow::Status::OK(); } diff --git a/cpp/src/gandiva/array_ops.h b/cpp/src/gandiva/array_ops.h index c0de72a39472..9b7d1d93b2be 100644 --- a/cpp/src/gandiva/array_ops.h +++ b/cpp/src/gandiva/array_ops.h @@ -30,57 +30,60 @@ extern "C" { GANDIVA_EXPORT bool array_int32_contains_int32(int64_t context_ptr, const int32_t* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - int32_t contains_data, bool entry_validWhat, - int64_t loop_var, int64_t validity_index_var, - bool* valid_buf); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, int32_t contains_data, + bool entry_validWhat, int64_t loop_var, + int64_t validity_index_var, bool* valid_buf); GANDIVA_EXPORT bool array_int64_contains_int64(int64_t context_ptr, const int64_t* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - int64_t contains_data, bool entry_validWhat, - int64_t loop_var, int64_t validity_index_var, - bool* valid_buf); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, int64_t contains_data, + bool entry_validWhat, int64_t loop_var, + int64_t validity_index_var, bool* valid_buf); GANDIVA_EXPORT bool array_float32_contains_float32(int64_t context_ptr, const float* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - float contains_data, bool entry_validWhat, - int64_t loop_var, int64_t validity_index_var, - bool* valid_buf); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, float contains_data, + bool entry_validWhat, int64_t loop_var, + int64_t validity_index_var, bool* valid_buf); GANDIVA_EXPORT bool array_float64_contains_float64(int64_t context_ptr, const double* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - double contains_data, bool entry_validWhat, - int64_t loop_var, int64_t validity_index_var, - bool* valid_buf); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, double contains_data, + bool entry_validWhat, int64_t loop_var, + int64_t validity_index_var, bool* valid_buf); GANDIVA_EXPORT int32_t* array_int32_remove(int64_t context_ptr, const int32_t* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - int32_t remove_data, bool entry_validWhat, - int64_t loop_var, int64_t validity_index_var, - bool* valid_row, int32_t* out_len, int32_t** valid_ptr); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, int32_t remove_data, + bool entry_validWhat, int64_t loop_var, + int64_t validity_index_var, bool* valid_row, int32_t* out_len, + int32_t** valid_ptr); GANDIVA_EXPORT int64_t* array_int64_remove(int64_t context_ptr, const int64_t* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - int64_t remove_data, bool entry_validWhat, - int64_t loop_var, int64_t validity_index_var, - bool* valid_row, int32_t* out_len, int32_t** valid_ptr); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, int64_t remove_data, + bool entry_validWhat, int64_t loop_var, + int64_t validity_index_var, bool* valid_row, int32_t* out_len, + int32_t** valid_ptr); GANDIVA_EXPORT float* array_float32_remove(int64_t context_ptr, const float* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - float remove_data, bool entry_validWhat, - int64_t loop_var, int64_t validity_index_var, - bool* valid_row, int32_t* out_len, int32_t** valid_ptr); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, float remove_data, + bool entry_validWhat, int64_t loop_var, + int64_t validity_index_var, bool* valid_row, int32_t* out_len, + int32_t** valid_ptr); GANDIVA_EXPORT double* array_float64_remove(int64_t context_ptr, const double* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - double remove_data, bool entry_validWhat, - int64_t loop_var, int64_t validity_index_var, - bool* valid_row, int32_t* out_len, int32_t** valid_ptr); - + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, double remove_data, + bool entry_validWhat, int64_t loop_var, + int64_t validity_index_var, bool* valid_row, + int32_t* out_len, int32_t** valid_ptr); } diff --git a/cpp/src/gandiva/array_ops_test.cc b/cpp/src/gandiva/array_ops_test.cc index bf01c1fe0a09..9732482b42ce 100644 --- a/cpp/src/gandiva/array_ops_test.cc +++ b/cpp/src/gandiva/array_ops_test.cc @@ -32,10 +32,9 @@ TEST(TestArrayOps, TestInt32ContainsInt32) { int32_t entry_validity = 15; bool valid = false; - EXPECT_EQ( - array_int32_contains_int32(ctx_ptr, data, entry_offsets_len, &entry_validity, - true, contains_data, true, 0, 3, &valid), - true); + EXPECT_EQ(array_int32_contains_int32(ctx_ptr, data, entry_offsets_len, &entry_validity, + true, contains_data, true, 0, 3, &valid), + true); } } // namespace gandiva diff --git a/cpp/src/gandiva/encrypt_mode_dispatcher.cc b/cpp/src/gandiva/encrypt_mode_dispatcher.cc index fad1c54ba9f3..70f61c06ebdc 100644 --- a/cpp/src/gandiva/encrypt_mode_dispatcher.cc +++ b/cpp/src/gandiva/encrypt_mode_dispatcher.cc @@ -16,23 +16,21 @@ // under the License. #include "gandiva/encrypt_mode_dispatcher.h" -#include "gandiva/encrypt_utils_ecb.h" -#include "gandiva/encrypt_utils_cbc.h" -#include "gandiva/encrypt_utils_gcm.h" -#include "arrow/util/string.h" -#include #include #include +#include #include +#include "arrow/util/string.h" +#include "gandiva/encrypt_utils_cbc.h" +#include "gandiva/encrypt_utils_ecb.h" +#include "gandiva/encrypt_utils_gcm.h" namespace gandiva { // Supported encryption modes static const std::vector SUPPORTED_MODES = { - AES_ECB_MODE, AES_ECB_PKCS7_MODE, AES_ECB_NONE_MODE, - AES_CBC_MODE, AES_CBC_PKCS7_MODE, AES_CBC_NONE_MODE, - AES_GCM_MODE -}; + AES_ECB_MODE, AES_ECB_PKCS7_MODE, AES_ECB_NONE_MODE, AES_CBC_MODE, + AES_CBC_PKCS7_MODE, AES_CBC_NONE_MODE, AES_GCM_MODE}; enum class EncryptionMode { ECB, @@ -56,13 +54,13 @@ EncryptionMode ParseEncryptionMode(std::string_view mode_str) { return EncryptionMode::UNKNOWN; } -int32_t EncryptModeDispatcher::encrypt( - const char* plaintext, int32_t plaintext_len, const char* key, - int32_t key_len, const char* mode, int32_t mode_len, const char* iv, - int32_t iv_len, const char* fifth_argument, int32_t fifth_argument_len, - unsigned char* cipher) { - std::string mode_str = - arrow::internal::AsciiToUpper(std::string_view(mode, mode_len)); +int32_t EncryptModeDispatcher::encrypt(const char* plaintext, int32_t plaintext_len, + const char* key, int32_t key_len, const char* mode, + int32_t mode_len, const char* iv, int32_t iv_len, + const char* fifth_argument, + int32_t fifth_argument_len, + unsigned char* cipher) { + std::string mode_str = arrow::internal::AsciiToUpper(std::string_view(mode, mode_len)); switch (ParseEncryptionMode(mode_str)) { case EncryptionMode::ECB: @@ -75,15 +73,15 @@ int32_t EncryptModeDispatcher::encrypt( case EncryptionMode::CBC: case EncryptionMode::CBC_PKCS7: // Shorthand AES-CBC and explicit AES-CBC-PKCS7 both use CBC with PKCS7 - return aes_encrypt_cbc(plaintext, plaintext_len, key, key_len, - iv, iv_len, true, cipher); + return aes_encrypt_cbc(plaintext, plaintext_len, key, key_len, iv, iv_len, true, + cipher); case EncryptionMode::CBC_NONE: // CBC without padding - return aes_encrypt_cbc(plaintext, plaintext_len, key, key_len, - iv, iv_len, false, cipher); + return aes_encrypt_cbc(plaintext, plaintext_len, key, key_len, iv, iv_len, false, + cipher); case EncryptionMode::GCM: - return aes_encrypt_gcm(plaintext, plaintext_len, key, key_len, - iv, iv_len, fifth_argument, fifth_argument_len, cipher); + return aes_encrypt_gcm(plaintext, plaintext_len, key, key_len, iv, iv_len, + fifth_argument, fifth_argument_len, cipher); case EncryptionMode::UNKNOWN: default: { std::string modes_str = arrow::internal::JoinStrings(SUPPORTED_MODES, ", "); @@ -95,13 +93,13 @@ int32_t EncryptModeDispatcher::encrypt( } } -int32_t EncryptModeDispatcher::decrypt( - const char* ciphertext, int32_t ciphertext_len, const char* key, - int32_t key_len, const char* mode, int32_t mode_len, const char* iv, - int32_t iv_len, const char* fifth_argument, int32_t fifth_argument_len, - unsigned char* plaintext) { - std::string mode_str = - arrow::internal::AsciiToUpper(std::string_view(mode, mode_len)); +int32_t EncryptModeDispatcher::decrypt(const char* ciphertext, int32_t ciphertext_len, + const char* key, int32_t key_len, const char* mode, + int32_t mode_len, const char* iv, int32_t iv_len, + const char* fifth_argument, + int32_t fifth_argument_len, + unsigned char* plaintext) { + std::string mode_str = arrow::internal::AsciiToUpper(std::string_view(mode, mode_len)); switch (ParseEncryptionMode(mode_str)) { case EncryptionMode::ECB: @@ -114,15 +112,15 @@ int32_t EncryptModeDispatcher::decrypt( case EncryptionMode::CBC: case EncryptionMode::CBC_PKCS7: // Shorthand AES-CBC and explicit AES-CBC-PKCS7 both use CBC with PKCS7 - return aes_decrypt_cbc(ciphertext, ciphertext_len, key, key_len, - iv, iv_len, true, plaintext); + return aes_decrypt_cbc(ciphertext, ciphertext_len, key, key_len, iv, iv_len, true, + plaintext); case EncryptionMode::CBC_NONE: // CBC without padding - return aes_decrypt_cbc(ciphertext, ciphertext_len, key, key_len, - iv, iv_len, false, plaintext); + return aes_decrypt_cbc(ciphertext, ciphertext_len, key, key_len, iv, iv_len, false, + plaintext); case EncryptionMode::GCM: - return aes_decrypt_gcm(ciphertext, ciphertext_len, key, key_len, - iv, iv_len, fifth_argument, fifth_argument_len, plaintext); + return aes_decrypt_gcm(ciphertext, ciphertext_len, key, key_len, iv, iv_len, + fifth_argument, fifth_argument_len, plaintext); case EncryptionMode::UNKNOWN: default: { std::string modes_str = arrow::internal::JoinStrings(SUPPORTED_MODES, ", "); @@ -135,4 +133,3 @@ int32_t EncryptModeDispatcher::decrypt( } } // namespace gandiva - diff --git a/cpp/src/gandiva/encrypt_mode_dispatcher.h b/cpp/src/gandiva/encrypt_mode_dispatcher.h index 20326845bd02..b4f7c5907a5f 100644 --- a/cpp/src/gandiva/encrypt_mode_dispatcher.h +++ b/cpp/src/gandiva/encrypt_mode_dispatcher.h @@ -45,12 +45,10 @@ class EncryptModeDispatcher { * @return Length of encrypted data in bytes * @throws std::runtime_error on encryption failure or unsupported mode */ - static int32_t encrypt(const char* plaintext, int32_t plaintext_len, - const char* key, int32_t key_len, - const char* mode, int32_t mode_len, - const char* iv, int32_t iv_len, - const char* fifth_argument, int32_t fifth_argument_len, - unsigned char* cipher); + static int32_t encrypt(const char* plaintext, int32_t plaintext_len, const char* key, + int32_t key_len, const char* mode, int32_t mode_len, + const char* iv, int32_t iv_len, const char* fifth_argument, + int32_t fifth_argument_len, unsigned char* cipher); /** * Decrypt data using the specified mode @@ -69,15 +67,12 @@ class EncryptModeDispatcher { * @return Length of decrypted data in bytes * @throws std::runtime_error on decryption failure or unsupported mode */ - static int32_t decrypt(const char* ciphertext, int32_t ciphertext_len, - const char* key, int32_t key_len, - const char* mode, int32_t mode_len, - const char* iv, int32_t iv_len, - const char* fifth_argument, int32_t fifth_argument_len, - unsigned char* plaintext); + static int32_t decrypt(const char* ciphertext, int32_t ciphertext_len, const char* key, + int32_t key_len, const char* mode, int32_t mode_len, + const char* iv, int32_t iv_len, const char* fifth_argument, + int32_t fifth_argument_len, unsigned char* plaintext); }; } // namespace gandiva #endif // GANDIVA_ENCRYPT_MODE_DISPATCHER_H - diff --git a/cpp/src/gandiva/encrypt_utils_cbc.cc b/cpp/src/gandiva/encrypt_utils_cbc.cc index 04eb60c96a77..e8f997e9e397 100644 --- a/cpp/src/gandiva/encrypt_utils_cbc.cc +++ b/cpp/src/gandiva/encrypt_utils_cbc.cc @@ -16,13 +16,13 @@ // under the License. #include "gandiva/encrypt_utils_cbc.h" -#include "gandiva/encrypt_utils_common.h" #include #include -#include +#include #include #include -#include +#include +#include "gandiva/encrypt_utils_common.h" namespace gandiva { @@ -49,8 +49,8 @@ const EVP_CIPHER* get_cbc_cipher_algo(int32_t key_length) { GANDIVA_EXPORT int32_t aes_encrypt_cbc(const char* plaintext, int32_t plaintext_len, const char* key, - int32_t key_len, const char* iv, int32_t iv_len, - bool use_padding, unsigned char* cipher) { + int32_t key_len, const char* iv, int32_t iv_len, bool use_padding, + unsigned char* cipher) { // Validate IV length if (iv_len != 16) { std::ostringstream oss; @@ -108,8 +108,8 @@ int32_t aes_encrypt_cbc(const char* plaintext, int32_t plaintext_len, const char GANDIVA_EXPORT int32_t aes_decrypt_cbc(const char* ciphertext, int32_t ciphertext_len, const char* key, - int32_t key_len, const char* iv, int32_t iv_len, - bool use_padding, unsigned char* plaintext) { + int32_t key_len, const char* iv, int32_t iv_len, bool use_padding, + unsigned char* plaintext) { // Validate IV length if (iv_len != 16) { std::ostringstream oss; @@ -166,4 +166,3 @@ int32_t aes_decrypt_cbc(const char* ciphertext, int32_t ciphertext_len, const ch } } // namespace gandiva - diff --git a/cpp/src/gandiva/encrypt_utils_cbc.h b/cpp/src/gandiva/encrypt_utils_cbc.h index b083d6f0a2de..9ac26eafbfa0 100644 --- a/cpp/src/gandiva/encrypt_utils_cbc.h +++ b/cpp/src/gandiva/encrypt_utils_cbc.h @@ -17,8 +17,8 @@ #pragma once -#include #include +#include #include "gandiva/visibility.h" namespace gandiva { @@ -44,8 +44,8 @@ constexpr const char* AES_CBC_NONE_MODE = "AES-CBC-NONE"; */ GANDIVA_EXPORT int32_t aes_encrypt_cbc(const char* plaintext, int32_t plaintext_len, const char* key, - int32_t key_len, const char* iv, int32_t iv_len, - bool use_padding, unsigned char* cipher); + int32_t key_len, const char* iv, int32_t iv_len, bool use_padding, + unsigned char* cipher); /** * Decrypt data using AES-CBC algorithm with explicit padding mode @@ -63,8 +63,7 @@ int32_t aes_encrypt_cbc(const char* plaintext, int32_t plaintext_len, const char */ GANDIVA_EXPORT int32_t aes_decrypt_cbc(const char* ciphertext, int32_t ciphertext_len, const char* key, - int32_t key_len, const char* iv, int32_t iv_len, - bool use_padding, unsigned char* plaintext); + int32_t key_len, const char* iv, int32_t iv_len, bool use_padding, + unsigned char* plaintext); } // namespace gandiva - diff --git a/cpp/src/gandiva/encrypt_utils_cbc_test.cc b/cpp/src/gandiva/encrypt_utils_cbc_test.cc index 8bf9227d65b4..6891ec0f4680 100644 --- a/cpp/src/gandiva/encrypt_utils_cbc_test.cc +++ b/cpp/src/gandiva/encrypt_utils_cbc_test.cc @@ -17,8 +17,8 @@ #include "gandiva/encrypt_utils_cbc.h" -#include #include +#include #include // Test PKCS#7 padding with 16-byte key @@ -36,9 +36,9 @@ TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptPkcs7_16) { iv, iv_len, true, cipher); unsigned char decrypted[64]; - int32_t decrypted_len = gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), - cipher_len, key, key_len, iv, iv_len, - true, decrypted); + int32_t decrypted_len = + gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), cipher_len, key, + key_len, iv, iv_len, true, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), std::string(reinterpret_cast(decrypted), decrypted_len)); @@ -59,9 +59,9 @@ TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptPkcs7_24) { iv, iv_len, true, cipher); unsigned char decrypted[64]; - int32_t decrypted_len = gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), - cipher_len, key, key_len, iv, iv_len, - true, decrypted); + int32_t decrypted_len = + gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), cipher_len, key, + key_len, iv, iv_len, true, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), std::string(reinterpret_cast(decrypted), decrypted_len)); @@ -82,9 +82,9 @@ TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptPkcs7_32) { iv, iv_len, true, cipher); unsigned char decrypted[64]; - int32_t decrypted_len = gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), - cipher_len, key, key_len, iv, iv_len, - true, decrypted); + int32_t decrypted_len = + gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), cipher_len, key, + key_len, iv, iv_len, true, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), std::string(reinterpret_cast(decrypted), decrypted_len)); @@ -105,9 +105,9 @@ TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptNoPadding_16) { iv, iv_len, false, cipher); unsigned char decrypted[64]; - int32_t decrypted_len = gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), - cipher_len, key, key_len, iv, iv_len, - false, decrypted); + int32_t decrypted_len = + gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), cipher_len, key, + key_len, iv, iv_len, false, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), std::string(reinterpret_cast(decrypted), decrypted_len)); @@ -125,8 +125,8 @@ TEST(TestAesCbcEncryptUtils, TestInvalidIVLength) { unsigned char cipher[64]; try { - gandiva::aes_encrypt_cbc(to_encrypt, to_encrypt_len, key, key_len, - iv, iv_len, true, cipher); + gandiva::aes_encrypt_cbc(to_encrypt, to_encrypt_len, key, key_len, iv, iv_len, true, + cipher); FAIL() << "Expected std::runtime_error"; } catch (const std::runtime_error& e) { EXPECT_THAT(e.what(), testing::HasSubstr("Invalid IV length for AES-CBC")); @@ -145,13 +145,10 @@ TEST(TestAesCbcEncryptUtils, TestInvalidKeyLength) { unsigned char cipher[64]; try { - gandiva::aes_encrypt_cbc(to_encrypt, to_encrypt_len, key, key_len, - iv, iv_len, true, cipher); + gandiva::aes_encrypt_cbc(to_encrypt, to_encrypt_len, key, key_len, iv, iv_len, true, + cipher); FAIL() << "Expected std::runtime_error"; } catch (const std::runtime_error& e) { EXPECT_THAT(e.what(), testing::HasSubstr("Unsupported key length for AES-CBC")); } } - - - diff --git a/cpp/src/gandiva/encrypt_utils_common.cc b/cpp/src/gandiva/encrypt_utils_common.cc index 3213e0c6e1a1..73210037c0f1 100644 --- a/cpp/src/gandiva/encrypt_utils_common.cc +++ b/cpp/src/gandiva/encrypt_utils_common.cc @@ -17,18 +17,17 @@ #include "gandiva/encrypt_utils_common.h" #include -#include #include +#include namespace gandiva { std::string get_openssl_error_string() { std::string error_string; - unsigned long error_code; char error_buffer[256]; // Loop through all errors in the queue - while ((error_code = ERR_get_error()) != 0) { + for (auto error_code = ERR_get_error(); error_code != 0; error_code = ERR_get_error()) { if (!error_string.empty()) { error_string += "; "; } @@ -43,4 +42,3 @@ std::string get_openssl_error_string() { } } // namespace gandiva - diff --git a/cpp/src/gandiva/encrypt_utils_common.h b/cpp/src/gandiva/encrypt_utils_common.h index 62dc14db348e..747f13963a2e 100644 --- a/cpp/src/gandiva/encrypt_utils_common.h +++ b/cpp/src/gandiva/encrypt_utils_common.h @@ -24,12 +24,13 @@ namespace gandiva { /// @brief Get a human-readable error string from OpenSSL's error queue. /// @details Retrieves all errors from the OpenSSL error queue and concatenates them -/// with "; " as a separator. This ensures complete error information is captured. -/// @return A string describing all OpenSSL errors in the queue, or "Unknown OpenSSL error" +/// with "; " as a separator. This ensures complete error information is +/// captured. +/// @return A string describing all OpenSSL errors in the queue, or "Unknown OpenSSL +/// error" /// if no error is available. std::string get_openssl_error_string(); } // namespace gandiva #endif // GANDIVA_ENCRYPT_UTILS_COMMON_H - diff --git a/cpp/src/gandiva/encrypt_utils_common_test.cc b/cpp/src/gandiva/encrypt_utils_common_test.cc index de55758d5377..5161d4afeecb 100644 --- a/cpp/src/gandiva/encrypt_utils_common_test.cc +++ b/cpp/src/gandiva/encrypt_utils_common_test.cc @@ -17,8 +17,8 @@ #include "gandiva/encrypt_utils_common.h" -#include #include +#include #include #include @@ -92,4 +92,3 @@ TEST(TestOpenSSLErrorUtils, TestErrorQueueDrained) { EXPECT_EQ(second_call, "Unknown OpenSSL error"); } - diff --git a/cpp/src/gandiva/encrypt_utils_ecb.cc b/cpp/src/gandiva/encrypt_utils_ecb.cc index b4913e1c8802..662a3a0986ef 100644 --- a/cpp/src/gandiva/encrypt_utils_ecb.cc +++ b/cpp/src/gandiva/encrypt_utils_ecb.cc @@ -16,12 +16,12 @@ // under the License. #include "gandiva/encrypt_utils_ecb.h" -#include "gandiva/encrypt_utils_common.h" #include #include -#include #include #include +#include +#include "gandiva/encrypt_utils_common.h" namespace gandiva { @@ -145,4 +145,3 @@ int32_t aes_decrypt_ecb(const char* ciphertext, int32_t ciphertext_len, const ch } } // namespace gandiva - diff --git a/cpp/src/gandiva/encrypt_utils_ecb.h b/cpp/src/gandiva/encrypt_utils_ecb.h index ba62bf3bea9a..af4a7a7c85bd 100644 --- a/cpp/src/gandiva/encrypt_utils_ecb.h +++ b/cpp/src/gandiva/encrypt_utils_ecb.h @@ -17,8 +17,8 @@ #pragma once -#include #include +#include #include "gandiva/visibility.h" namespace gandiva { @@ -67,4 +67,3 @@ int32_t aes_decrypt_ecb(const char* ciphertext, int32_t ciphertext_len, const ch int32_t key_len, bool use_padding, unsigned char* plaintext); } // namespace gandiva - diff --git a/cpp/src/gandiva/encrypt_utils_ecb_test.cc b/cpp/src/gandiva/encrypt_utils_ecb_test.cc index 52687a8a4a9d..1245b397c6dc 100644 --- a/cpp/src/gandiva/encrypt_utils_ecb_test.cc +++ b/cpp/src/gandiva/encrypt_utils_ecb_test.cc @@ -30,11 +30,13 @@ TEST(TestAesEcbEncryptUtils, TestAesEncryptDecrypt) { static_cast(strlen(reinterpret_cast(to_encrypt))); unsigned char cipher_1[64]; - int32_t cipher_1_len = gandiva::aes_encrypt_ecb(to_encrypt, to_encrypt_len, key, key_len, true, cipher_1); + int32_t cipher_1_len = + gandiva::aes_encrypt_ecb(to_encrypt, to_encrypt_len, key, key_len, true, cipher_1); unsigned char decrypted_1[64]; - int32_t decrypted_1_len = gandiva::aes_decrypt_ecb(reinterpret_cast(cipher_1), - cipher_1_len, key, key_len, true, decrypted_1); + int32_t decrypted_1_len = + gandiva::aes_decrypt_ecb(reinterpret_cast(cipher_1), cipher_1_len, key, + key_len, true, decrypted_1); EXPECT_EQ(std::string(reinterpret_cast(to_encrypt), to_encrypt_len), std::string(reinterpret_cast(decrypted_1), decrypted_1_len)); @@ -48,11 +50,13 @@ TEST(TestAesEcbEncryptUtils, TestAesEncryptDecrypt) { static_cast(strlen(reinterpret_cast(to_encrypt))); unsigned char cipher_2[64]; - int32_t cipher_2_len = gandiva::aes_encrypt_ecb(to_encrypt, to_encrypt_len, key, key_len, true, cipher_2); + int32_t cipher_2_len = + gandiva::aes_encrypt_ecb(to_encrypt, to_encrypt_len, key, key_len, true, cipher_2); unsigned char decrypted_2[64]; - int32_t decrypted_2_len = gandiva::aes_decrypt_ecb(reinterpret_cast(cipher_2), - cipher_2_len, key, key_len, true, decrypted_2); + int32_t decrypted_2_len = + gandiva::aes_decrypt_ecb(reinterpret_cast(cipher_2), cipher_2_len, key, + key_len, true, decrypted_2); EXPECT_EQ(std::string(reinterpret_cast(to_encrypt), to_encrypt_len), std::string(reinterpret_cast(decrypted_2), decrypted_2_len)); @@ -66,18 +70,21 @@ TEST(TestAesEcbEncryptUtils, TestAesEncryptDecrypt) { static_cast(strlen(reinterpret_cast(to_encrypt))); unsigned char cipher_3[64]; - int32_t cipher_3_len = gandiva::aes_encrypt_ecb(to_encrypt, to_encrypt_len, key, key_len, true, cipher_3); + int32_t cipher_3_len = + gandiva::aes_encrypt_ecb(to_encrypt, to_encrypt_len, key, key_len, true, cipher_3); unsigned char decrypted_3[64]; - int32_t decrypted_3_len = gandiva::aes_decrypt_ecb(reinterpret_cast(cipher_3), - cipher_3_len, key, key_len, true, decrypted_3); + int32_t decrypted_3_len = + gandiva::aes_decrypt_ecb(reinterpret_cast(cipher_3), cipher_3_len, key, + key_len, true, decrypted_3); EXPECT_EQ(std::string(reinterpret_cast(to_encrypt), to_encrypt_len), std::string(reinterpret_cast(decrypted_3), decrypted_3_len)); // check exception char cipher[64] = "JBB7oJAQuqhDCx01fvBRi8PcljW1+nbnOSMk+R0Sz7E=="; - int32_t cipher_len = static_cast(strlen(reinterpret_cast(cipher))); + int32_t cipher_len = + static_cast(strlen(reinterpret_cast(cipher))); unsigned char plain_text[64]; key = "12345678abcdefgh12345678abcdefgh12345678abcdefgh12345678abcdefgh"; @@ -87,13 +94,16 @@ TEST(TestAesEcbEncryptUtils, TestAesEncryptDecrypt) { to_encrypt_len = static_cast(strlen(reinterpret_cast(to_encrypt))); unsigned char cipher_4[64]; - ASSERT_THROW({ - gandiva::aes_encrypt_ecb(to_encrypt, to_encrypt_len, key, key_len, true, cipher_4); - }, std::runtime_error); + ASSERT_THROW( + { + gandiva::aes_encrypt_ecb(to_encrypt, to_encrypt_len, key, key_len, true, + cipher_4); + }, + std::runtime_error); - ASSERT_THROW({ - gandiva::aes_decrypt_ecb(cipher, cipher_len, key, key_len, true, plain_text); - }, std::runtime_error); + ASSERT_THROW( + { gandiva::aes_decrypt_ecb(cipher, cipher_len, key, key_len, true, plain_text); }, + std::runtime_error); key = "12345678"; to_encrypt = "New\ntest\nstring"; @@ -102,11 +112,13 @@ TEST(TestAesEcbEncryptUtils, TestAesEncryptDecrypt) { to_encrypt_len = static_cast(strlen(reinterpret_cast(to_encrypt))); unsigned char cipher_5[64]; - ASSERT_THROW({ - gandiva::aes_encrypt_ecb(to_encrypt, to_encrypt_len, key, key_len, true, cipher_5); - }, std::runtime_error); - ASSERT_THROW({ - gandiva::aes_decrypt_ecb(cipher, cipher_len, key, key_len, true, plain_text); - }, std::runtime_error); + ASSERT_THROW( + { + gandiva::aes_encrypt_ecb(to_encrypt, to_encrypt_len, key, key_len, true, + cipher_5); + }, + std::runtime_error); + ASSERT_THROW( + { gandiva::aes_decrypt_ecb(cipher, cipher_len, key, key_len, true, plain_text); }, + std::runtime_error); } - diff --git a/cpp/src/gandiva/encrypt_utils_gcm.cc b/cpp/src/gandiva/encrypt_utils_gcm.cc index f028243da590..fa8600553c3d 100644 --- a/cpp/src/gandiva/encrypt_utils_gcm.cc +++ b/cpp/src/gandiva/encrypt_utils_gcm.cc @@ -16,12 +16,12 @@ // under the License. #include "gandiva/encrypt_utils_gcm.h" -#include "gandiva/encrypt_utils_common.h" #include #include -#include #include #include +#include +#include "gandiva/encrypt_utils_common.h" namespace gandiva { @@ -47,10 +47,9 @@ const EVP_CIPHER* get_gcm_cipher_algo(int32_t key_length) { } // namespace GANDIVA_EXPORT -int32_t aes_encrypt_gcm(const char* plaintext, int32_t plaintext_len, - const char* key, int32_t key_len, const char* iv, - int32_t iv_len, const char* aad, int32_t aad_len, - unsigned char* cipher) { +int32_t aes_encrypt_gcm(const char* plaintext, int32_t plaintext_len, const char* key, + int32_t key_len, const char* iv, int32_t iv_len, const char* aad, + int32_t aad_len, unsigned char* cipher) { if (iv_len <= 0) { throw std::runtime_error( "Invalid IV length for AES-GCM: IV length must be greater than 0"); @@ -125,10 +124,9 @@ int32_t aes_encrypt_gcm(const char* plaintext, int32_t plaintext_len, } GANDIVA_EXPORT -int32_t aes_decrypt_gcm(const char* ciphertext, int32_t ciphertext_len, - const char* key, int32_t key_len, const char* iv, - int32_t iv_len, const char* aad, int32_t aad_len, - unsigned char* plaintext) { +int32_t aes_decrypt_gcm(const char* ciphertext, int32_t ciphertext_len, const char* key, + int32_t key_len, const char* iv, int32_t iv_len, const char* aad, + int32_t aad_len, unsigned char* plaintext) { if (iv_len <= 0) { throw std::runtime_error( "Invalid IV length for AES-GCM: IV length must be greater than 0"); @@ -211,4 +209,3 @@ int32_t aes_decrypt_gcm(const char* ciphertext, int32_t ciphertext_len, } } // namespace gandiva - diff --git a/cpp/src/gandiva/encrypt_utils_gcm.h b/cpp/src/gandiva/encrypt_utils_gcm.h index 07a597af0b6c..3c315928003c 100644 --- a/cpp/src/gandiva/encrypt_utils_gcm.h +++ b/cpp/src/gandiva/encrypt_utils_gcm.h @@ -17,8 +17,8 @@ #pragma once -#include #include +#include #include "gandiva/visibility.h" namespace gandiva { @@ -40,14 +40,15 @@ constexpr int32_t GCM_TAG_LENGTH = 16; * @param iv_len Length of IV in bytes * @param aad Optional additional authenticated data (can be null) * @param aad_len Length of AAD in bytes (0 if aad is null) - * @param cipher Output buffer for encrypted data (must be at least plaintext_len + 16 bytes) + * @param cipher Output buffer for encrypted data (must be at least plaintext_len + 16 + * bytes) * @return Length of encrypted data in bytes (plaintext_len + 16 for the tag) * @throws std::runtime_error on encryption failure or invalid parameters */ GANDIVA_EXPORT int32_t aes_encrypt_gcm(const char* plaintext, int32_t plaintext_len, const char* key, - int32_t key_len, const char* iv, int32_t iv_len, - const char* aad, int32_t aad_len, unsigned char* cipher); + int32_t key_len, const char* iv, int32_t iv_len, const char* aad, + int32_t aad_len, unsigned char* cipher); /** * Decrypt data using AES-GCM algorithm @@ -62,12 +63,12 @@ int32_t aes_encrypt_gcm(const char* plaintext, int32_t plaintext_len, const char * @param aad_len Length of AAD in bytes (0 if aad is null) * @param plaintext Output buffer for decrypted data * @return Length of decrypted data in bytes (ciphertext_len - 16) - * @throws std::runtime_error on decryption failure, invalid parameters, or tag verification failure + * @throws std::runtime_error on decryption failure, invalid parameters, or tag + * verification failure */ GANDIVA_EXPORT int32_t aes_decrypt_gcm(const char* ciphertext, int32_t ciphertext_len, const char* key, - int32_t key_len, const char* iv, int32_t iv_len, - const char* aad, int32_t aad_len, unsigned char* plaintext); + int32_t key_len, const char* iv, int32_t iv_len, const char* aad, + int32_t aad_len, unsigned char* plaintext); } // namespace gandiva - diff --git a/cpp/src/gandiva/encrypt_utils_gcm_test.cc b/cpp/src/gandiva/encrypt_utils_gcm_test.cc index 2156132bc628..05e472f31446 100644 --- a/cpp/src/gandiva/encrypt_utils_gcm_test.cc +++ b/cpp/src/gandiva/encrypt_utils_gcm_test.cc @@ -17,8 +17,8 @@ #include "gandiva/encrypt_utils_gcm.h" -#include #include +#include #include // Test IV-only GCM with 16-byte key @@ -39,9 +39,9 @@ TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptIvOnly_16) { EXPECT_EQ(cipher_len, to_encrypt_len + 16); unsigned char decrypted[128]; - int32_t decrypted_len = gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), - cipher_len, key, key_len, iv, iv_len, - nullptr, 0, decrypted); + int32_t decrypted_len = + gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), cipher_len, key, + key_len, iv, iv_len, nullptr, 0, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), std::string(reinterpret_cast(decrypted), decrypted_len)); @@ -66,9 +66,9 @@ TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptWithAad_16) { EXPECT_EQ(cipher_len, to_encrypt_len + 16); unsigned char decrypted[128]; - int32_t decrypted_len = gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), - cipher_len, key, key_len, iv, iv_len, - aad, aad_len, decrypted); + int32_t decrypted_len = + gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), cipher_len, key, + key_len, iv, iv_len, aad, aad_len, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), std::string(reinterpret_cast(decrypted), decrypted_len)); @@ -89,9 +89,9 @@ TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptIvOnly_24) { iv, iv_len, nullptr, 0, cipher); unsigned char decrypted[128]; - int32_t decrypted_len = gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), - cipher_len, key, key_len, iv, iv_len, - nullptr, 0, decrypted); + int32_t decrypted_len = + gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), cipher_len, key, + key_len, iv, iv_len, nullptr, 0, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), std::string(reinterpret_cast(decrypted), decrypted_len)); @@ -112,9 +112,9 @@ TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptIvOnly_32) { iv, iv_len, nullptr, 0, cipher); unsigned char decrypted[128]; - int32_t decrypted_len = gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), - cipher_len, key, key_len, iv, iv_len, - nullptr, 0, decrypted); + int32_t decrypted_len = + gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), cipher_len, key, + key_len, iv, iv_len, nullptr, 0, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), std::string(reinterpret_cast(decrypted), decrypted_len)); @@ -138,9 +138,8 @@ TEST(TestAesGcmEncryptUtils, TestTagVerificationFailure) { cipher[cipher_len - 1] ^= 0xFF; unsigned char decrypted[128]; - EXPECT_THROW(gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), - cipher_len, key, key_len, iv, iv_len, - nullptr, 0, decrypted), + EXPECT_THROW(gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), cipher_len, + key, key_len, iv, iv_len, nullptr, 0, decrypted), std::runtime_error); } @@ -155,8 +154,7 @@ TEST(TestAesGcmEncryptUtils, TestInvalidIvLength) { auto to_encrypt_len = static_cast(strlen(to_encrypt)); unsigned char cipher[128]; - EXPECT_THROW(gandiva::aes_encrypt_gcm(to_encrypt, to_encrypt_len, key, key_len, - iv, iv_len, nullptr, 0, cipher), + EXPECT_THROW(gandiva::aes_encrypt_gcm(to_encrypt, to_encrypt_len, key, key_len, iv, + iv_len, nullptr, 0, cipher), std::runtime_error); } - diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc index 42316dce8d00..c7ef2a7f4f70 100644 --- a/cpp/src/gandiva/engine.cc +++ b/cpp/src/gandiva/engine.cc @@ -198,7 +198,7 @@ Status Engine::Make(const std::shared_ptr& conf, bool cached, #else using CodeGenOptLevel = llvm::CodeGenOpt::Level; #endif - auto const opt_level = + auto const opt_level = conf->optimize() ? CodeGenOptLevel::Aggressive : CodeGenOptLevel::None; // Note that the lifetime of the error string is not captured by the diff --git a/cpp/src/gandiva/field_descriptor.h b/cpp/src/gandiva/field_descriptor.h index dfcf6872d501..7eb6e5822efd 100644 --- a/cpp/src/gandiva/field_descriptor.h +++ b/cpp/src/gandiva/field_descriptor.h @@ -31,15 +31,15 @@ class FieldDescriptor { FieldDescriptor(FieldPtr field, int data_idx, int validity_idx = kInvalidIdx, int offsets_idx = kInvalidIdx, int data_buffer_ptr_idx = kInvalidIdx, - int child_offsets_idx = kInvalidIdx, int child_validity_idx = kInvalidIdx) + int child_offsets_idx = kInvalidIdx, + int child_validity_idx = kInvalidIdx) : field_(field), data_idx_(data_idx), validity_idx_(validity_idx), offsets_idx_(offsets_idx), data_buffer_ptr_idx_(data_buffer_ptr_idx), child_offsets_idx_(child_offsets_idx), - child_validity_idx_(child_validity_idx) { - } + child_validity_idx_(child_validity_idx) {} /// Index of validity array in the array-of-buffers int validity_idx() const { return validity_idx_; } @@ -56,9 +56,7 @@ class FieldDescriptor { /// Index of list type child data offsets int child_data_offsets_idx() const { return child_offsets_idx_; } int child_data_validity_idx() const { return child_validity_idx_; } - void set_child_data_validity_idx(int val) { - child_validity_idx_ = val; - } + void set_child_data_validity_idx(int val) { child_validity_idx_ = val; } FieldPtr field() const { return field_; } const std::string& Name() const { return field_->name(); } diff --git a/cpp/src/gandiva/function_registry.cc b/cpp/src/gandiva/function_registry.cc index a9051a244c73..f1aefb921d14 100644 --- a/cpp/src/gandiva/function_registry.cc +++ b/cpp/src/gandiva/function_registry.cc @@ -17,7 +17,6 @@ #include "gandiva/function_registry.h" - #include #include #include @@ -146,7 +145,8 @@ arrow::Result> MakeDefaultFunctionRegistry() { for (auto const& funcs : {GetArithmeticFunctionRegistry(), GetDateTimeFunctionRegistry(), GetHashFunctionRegistry(), GetMathOpsFunctionRegistry(), - GetStringFunctionRegistry(), GetDateTimeArithmeticFunctionRegistry(), GetArrayFunctionRegistry()}) { + GetStringFunctionRegistry(), GetDateTimeArithmeticFunctionRegistry(), + GetArrayFunctionRegistry()}) { for (auto const& func_signature : funcs) { ARROW_RETURN_NOT_OK(registry->Add(func_signature)); } diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc index 7750421360e3..bb910843632c 100644 --- a/cpp/src/gandiva/function_registry_string.cc +++ b/cpp/src/gandiva/function_registry_string.cc @@ -514,21 +514,23 @@ std::vector GetStringFunctionRegistry() { NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), // Parameters: data, key, mode, iv (e.g. CBC mode) - NativeFunction("encrypt", {}, DataTypeVector{binary(), binary(), utf8(), binary()}, binary(), - kResultNullIfNull, "gdv_fn_encrypt_dispatcher_4args", + NativeFunction("encrypt", {}, DataTypeVector{binary(), binary(), utf8(), binary()}, + binary(), kResultNullIfNull, "gdv_fn_encrypt_dispatcher_4args", NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), - NativeFunction("decrypt", {}, DataTypeVector{binary(), binary(), utf8(), binary()}, binary(), - kResultNullIfNull, "gdv_fn_decrypt_dispatcher_4args", + NativeFunction("decrypt", {}, DataTypeVector{binary(), binary(), utf8(), binary()}, + binary(), kResultNullIfNull, "gdv_fn_decrypt_dispatcher_4args", NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), // Parameters: data, key, mode, iv, fifth_argument (e.g. GCM mode) - NativeFunction("encrypt", {}, DataTypeVector{binary(), binary(), utf8(), binary(), binary()}, binary(), - kResultNullIfNull, "gdv_fn_encrypt_dispatcher_5args", + NativeFunction("encrypt", {}, + DataTypeVector{binary(), binary(), utf8(), binary(), binary()}, + binary(), kResultNullIfNull, "gdv_fn_encrypt_dispatcher_5args", NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), - NativeFunction("decrypt", {}, DataTypeVector{binary(), binary(), utf8(), binary(), binary()}, binary(), - kResultNullIfNull, "gdv_fn_decrypt_dispatcher_5args", + NativeFunction("decrypt", {}, + DataTypeVector{binary(), binary(), utf8(), binary(), binary()}, + binary(), kResultNullIfNull, "gdv_fn_decrypt_dispatcher_5args", NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), NativeFunction("mask_first_n", {}, DataTypeVector{utf8(), int32()}, utf8(), diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc index a33483e8a002..166f8874a806 100644 --- a/cpp/src/gandiva/gdv_function_stubs.cc +++ b/cpp/src/gandiva/gdv_function_stubs.cc @@ -30,9 +30,9 @@ #include "arrow/util/double_conversion.h" #include "arrow/util/value_parsing.h" -#include "gandiva/encrypt_utils_ecb.h" -#include "gandiva/encrypt_utils_cbc.h" #include "gandiva/encrypt_mode_dispatcher.h" +#include "gandiva/encrypt_utils_cbc.h" +#include "gandiva/encrypt_utils_ecb.h" #include "gandiva/engine.h" #include "gandiva/exported_funcs.h" #include "gandiva/in_holder.h" @@ -164,29 +164,31 @@ int32_t gdv_fn_populate_varlen_vector(int64_t context_ptr, int8_t* data_ptr, } /// Stub functions that can be accessed from LLVM or the pre-compiled library. -#define POPULATE_NUMERIC_LIST_TYPE_VECTOR(TYPE, SCALE) \ - int32_t gdv_fn_populate_list_##TYPE##_vector(int64_t context_ptr, int8_t* data_ptr, \ - int32_t* offsets, int64_t slot, \ - TYPE* entry_buf, int32_t entry_len, int32_t** valid_ptr) { \ - auto buffer = reinterpret_cast(data_ptr); \ - int32_t offset = static_cast(buffer->size()); \ - auto status = buffer->Resize(offset + entry_len * SCALE, false /*shrink*/); \ - if (!status.ok()) { \ - gandiva::ExecutionContext* context = \ - reinterpret_cast(context_ptr); \ - context->set_error_msg(status.message().c_str()); \ - return -1; \ - } \ - memcpy(buffer->mutable_data() + offset, (char*)entry_buf, entry_len * SCALE); \ - int validbitIndex = offset / SCALE; \ - for (int i = 0; i < entry_len; i++) { \ - arrow::bit_util::SetBitTo(buffer->validityBuffer, validbitIndex + i, arrow::bit_util::GetBit(reinterpret_cast(valid_ptr), i)); \ - } \ - offsets = reinterpret_cast(buffer->offsetBuffer); \ - offsets[slot] = offset / SCALE; \ - offsets[slot + 1] = offset / SCALE + entry_len; \ +#define POPULATE_NUMERIC_LIST_TYPE_VECTOR(TYPE, SCALE) \ + int32_t gdv_fn_populate_list_##TYPE##_vector( \ + int64_t context_ptr, int8_t* data_ptr, int32_t* offsets, int64_t slot, \ + TYPE* entry_buf, int32_t entry_len, int32_t** valid_ptr) { \ + auto buffer = reinterpret_cast(data_ptr); \ + int32_t offset = static_cast(buffer->size()); \ + auto status = buffer->Resize(offset + entry_len * SCALE, false /*shrink*/); \ + if (!status.ok()) { \ + gandiva::ExecutionContext* context = \ + reinterpret_cast(context_ptr); \ + context->set_error_msg(status.message().c_str()); \ + return -1; \ + } \ + memcpy(buffer->mutable_data() + offset, (char*)entry_buf, entry_len * SCALE); \ + int validbitIndex = offset / SCALE; \ + for (int i = 0; i < entry_len; i++) { \ + arrow::bit_util::SetBitTo( \ + buffer->validityBuffer, validbitIndex + i, \ + arrow::bit_util::GetBit(reinterpret_cast(valid_ptr), i)); \ + } \ + offsets = reinterpret_cast(buffer->offsetBuffer); \ + offsets[slot] = offset / SCALE; \ + offsets[slot + 1] = offset / SCALE + entry_len; \ return 0; \ - }\ + } POPULATE_NUMERIC_LIST_TYPE_VECTOR(int32_t, 4) POPULATE_NUMERIC_LIST_TYPE_VECTOR(int64_t, 8) @@ -398,8 +400,6 @@ CAST_NUMERIC_FROM_VARBINARY(double, arrow::DoubleType, FLOAT8) #undef GDV_FN_CAST_VARCHAR_INTEGER #undef GDV_FN_CAST_VARCHAR_REAL - - GANDIVA_EXPORT const char* gdv_mask_first_n_utf8_int32(int64_t context, const char* data, int32_t data_len, int32_t n_to_mask, @@ -858,12 +858,9 @@ namespace gandiva { // This is called by the LLVM engine with string calling convention // WARNING: This function is for backward compatibility only. Encrypted binary // data is not guaranteed to be valid UTF-8. Use binary signatures for new code. -extern "C" GANDIVA_EXPORT -const char* gdv_fn_aes_encrypt_ecb_legacy(int64_t context, const char* data, - int32_t data_len, - const char* key_data, - int32_t key_data_len, - int32_t* out_len) { +extern "C" GANDIVA_EXPORT const char* gdv_fn_aes_encrypt_ecb_legacy( + int64_t context, const char* data, int32_t data_len, const char* key_data, + int32_t key_data_len, int32_t* out_len) { // Delegate to the core implementation with ECB mode // This function is ECB-only, so we enforce the mode const char* mode = "AES-ECB"; @@ -885,12 +882,9 @@ const char* gdv_fn_aes_encrypt_ecb_legacy(int64_t context, const char* data, // This is called by the LLVM engine with string calling convention // WARNING: This function is for backward compatibility only. Decrypted data // may not be valid UTF-8. Use binary signatures for new code. -extern "C" GANDIVA_EXPORT -const char* gdv_fn_aes_decrypt_ecb_legacy(int64_t context, const char* data, - int32_t data_len, - const char* key_data, - int32_t key_data_len, - int32_t* out_len) { +extern "C" GANDIVA_EXPORT const char* gdv_fn_aes_decrypt_ecb_legacy( + int64_t context, const char* data, int32_t data_len, const char* key_data, + int32_t key_data_len, int32_t* out_len) { // Delegate to the core implementation with ECB mode // This function is ECB-only, so we enforce the mode const char* mode = "AES-ECB"; @@ -909,52 +903,43 @@ const char* gdv_fn_aes_decrypt_ecb_legacy(int64_t context, const char* data, } // The 3- and 4-arg signatures exist to support optional IV and other arguments -extern "C" GANDIVA_EXPORT -const char* gdv_fn_encrypt_dispatcher_3args( +extern "C" GANDIVA_EXPORT const char* gdv_fn_encrypt_dispatcher_3args( int64_t context, const char* data, int32_t data_len, const char* key_data, - int32_t key_data_len, const char* mode, int32_t mode_len, - int32_t* out_len) { - return gdv_fn_encrypt_dispatcher_5args( - context, data, data_len, key_data, key_data_len, mode, mode_len, nullptr, - 0, nullptr, 0, out_len); + int32_t key_data_len, const char* mode, int32_t mode_len, int32_t* out_len) { + return gdv_fn_encrypt_dispatcher_5args(context, data, data_len, key_data, key_data_len, + mode, mode_len, nullptr, 0, nullptr, 0, out_len); } -extern "C" GANDIVA_EXPORT -const char* gdv_fn_decrypt_dispatcher_3args( +extern "C" GANDIVA_EXPORT const char* gdv_fn_decrypt_dispatcher_3args( int64_t context, const char* data, int32_t data_len, const char* key_data, - int32_t key_data_len, const char* mode, int32_t mode_len, - int32_t* out_len) { - return gdv_fn_decrypt_dispatcher_5args( - context, data, data_len, key_data, key_data_len, mode, mode_len, nullptr, - 0, nullptr, 0, out_len); + int32_t key_data_len, const char* mode, int32_t mode_len, int32_t* out_len) { + return gdv_fn_decrypt_dispatcher_5args(context, data, data_len, key_data, key_data_len, + mode, mode_len, nullptr, 0, nullptr, 0, out_len); } -extern "C" GANDIVA_EXPORT -const char* gdv_fn_encrypt_dispatcher_4args( +extern "C" GANDIVA_EXPORT const char* gdv_fn_encrypt_dispatcher_4args( int64_t context, const char* data, int32_t data_len, const char* key_data, - int32_t key_data_len, const char* mode, int32_t mode_len, - const char* iv_data, int32_t iv_data_len, int32_t* out_len) { - return gdv_fn_encrypt_dispatcher_5args( - context, data, data_len, key_data, key_data_len, mode, mode_len, iv_data, - iv_data_len, nullptr, 0, out_len); + int32_t key_data_len, const char* mode, int32_t mode_len, const char* iv_data, + int32_t iv_data_len, int32_t* out_len) { + return gdv_fn_encrypt_dispatcher_5args(context, data, data_len, key_data, key_data_len, + mode, mode_len, iv_data, iv_data_len, nullptr, 0, + out_len); } -extern "C" GANDIVA_EXPORT -const char* gdv_fn_decrypt_dispatcher_4args( +extern "C" GANDIVA_EXPORT const char* gdv_fn_decrypt_dispatcher_4args( int64_t context, const char* data, int32_t data_len, const char* key_data, - int32_t key_data_len, const char* mode, int32_t mode_len, - const char* iv_data, int32_t iv_data_len, int32_t* out_len) { - return gdv_fn_decrypt_dispatcher_5args( - context, data, data_len, key_data, key_data_len, mode, mode_len, iv_data, - iv_data_len, nullptr, 0, out_len); + int32_t key_data_len, const char* mode, int32_t mode_len, const char* iv_data, + int32_t iv_data_len, int32_t* out_len) { + return gdv_fn_decrypt_dispatcher_5args(context, data, data_len, key_data, key_data_len, + mode, mode_len, iv_data, iv_data_len, nullptr, 0, + out_len); } -extern "C" GANDIVA_EXPORT -const char* gdv_fn_encrypt_dispatcher_5args( +extern "C" GANDIVA_EXPORT const char* gdv_fn_encrypt_dispatcher_5args( int64_t context, const char* data, int32_t data_len, const char* key_data, - int32_t key_data_len, const char* mode, int32_t mode_len, - const char* iv_data, int32_t iv_data_len, const char* fifth_argument, - int32_t fifth_argument_len, int32_t* out_len) { + int32_t key_data_len, const char* mode, int32_t mode_len, const char* iv_data, + int32_t iv_data_len, const char* fifth_argument, int32_t fifth_argument_len, + int32_t* out_len) { try { // Allocate extra 16 bytes for AES block padding (PKCS7 padding can add // up to 16 bytes for a 128-bit block cipher) @@ -962,13 +947,12 @@ const char* gdv_fn_encrypt_dispatcher_5args( auto* output = reinterpret_cast( gdv_fn_context_arena_malloc(context, data_len + 16)); if (output == nullptr) { - throw std::runtime_error( - "Memory allocation failed for encryption output"); + throw std::runtime_error("Memory allocation failed for encryption output"); } int32_t cipher_len = EncryptModeDispatcher::encrypt( - data, data_len, key_data, key_data_len, mode, mode_len, iv_data, - iv_data_len, fifth_argument, fifth_argument_len, output); + data, data_len, key_data, key_data_len, mode, mode_len, iv_data, iv_data_len, + fifth_argument, fifth_argument_len, output); *out_len = cipher_len; return reinterpret_cast(output); @@ -979,23 +963,21 @@ const char* gdv_fn_encrypt_dispatcher_5args( } } -extern "C" GANDIVA_EXPORT -const char* gdv_fn_decrypt_dispatcher_5args( +extern "C" GANDIVA_EXPORT const char* gdv_fn_decrypt_dispatcher_5args( int64_t context, const char* data, int32_t data_len, const char* key_data, - int32_t key_data_len, const char* mode, int32_t mode_len, - const char* iv_data, int32_t iv_data_len, const char* fifth_argument, - int32_t fifth_argument_len, int32_t* out_len) { + int32_t key_data_len, const char* mode, int32_t mode_len, const char* iv_data, + int32_t iv_data_len, const char* fifth_argument, int32_t fifth_argument_len, + int32_t* out_len) { try { - auto* output = reinterpret_cast( - gdv_fn_context_arena_malloc(context, data_len)); + auto* output = + reinterpret_cast(gdv_fn_context_arena_malloc(context, data_len)); if (output == nullptr) { - throw std::runtime_error( - "Memory allocation failed for decryption output"); + throw std::runtime_error("Memory allocation failed for decryption output"); } int32_t plaintext_len = EncryptModeDispatcher::decrypt( - data, data_len, key_data, key_data_len, mode, mode_len, iv_data, - iv_data_len, fifth_argument, fifth_argument_len, output); + data, data_len, key_data, key_data_len, mode, mode_len, iv_data, iv_data_len, + fifth_argument, fifth_argument_len, output); *out_len = plaintext_len; return reinterpret_cast(output); @@ -1254,8 +1236,7 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { }; engine->AddGlobalMappingForFunc( - "gdv_fn_encrypt_dispatcher_3args", - types->i8_ptr_type() /*return_type*/, args, + "gdv_fn_encrypt_dispatcher_3args", types->i8_ptr_type() /*return_type*/, args, reinterpret_cast(gdv_fn_encrypt_dispatcher_3args)); // gdv_fn_decrypt_dispatcher_3args (data, key, mode) @@ -1271,8 +1252,7 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { }; engine->AddGlobalMappingForFunc( - "gdv_fn_decrypt_dispatcher_3args", - types->i8_ptr_type() /*return_type*/, args, + "gdv_fn_decrypt_dispatcher_3args", types->i8_ptr_type() /*return_type*/, args, reinterpret_cast(gdv_fn_decrypt_dispatcher_3args)); // gdv_fn_encrypt_dispatcher_4args (data, key, mode, iv) @@ -1290,8 +1270,7 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { }; engine->AddGlobalMappingForFunc( - "gdv_fn_encrypt_dispatcher_4args", - types->i8_ptr_type() /*return_type*/, args, + "gdv_fn_encrypt_dispatcher_4args", types->i8_ptr_type() /*return_type*/, args, reinterpret_cast(gdv_fn_encrypt_dispatcher_4args)); // gdv_fn_decrypt_dispatcher_4args (data, key, mode, iv) @@ -1309,8 +1288,7 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { }; engine->AddGlobalMappingForFunc( - "gdv_fn_decrypt_dispatcher_4args", - types->i8_ptr_type() /*return_type*/, args, + "gdv_fn_decrypt_dispatcher_4args", types->i8_ptr_type() /*return_type*/, args, reinterpret_cast(gdv_fn_decrypt_dispatcher_4args)); // gdv_fn_encrypt_dispatcher_5args (data, key, mode, iv, @@ -1331,8 +1309,7 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { }; engine->AddGlobalMappingForFunc( - "gdv_fn_encrypt_dispatcher_5args", - types->i8_ptr_type() /*return_type*/, args, + "gdv_fn_encrypt_dispatcher_5args", types->i8_ptr_type() /*return_type*/, args, reinterpret_cast(gdv_fn_encrypt_dispatcher_5args)); // gdv_fn_decrypt_dispatcher_5args (data, key, mode, iv, @@ -1353,8 +1330,7 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { }; engine->AddGlobalMappingForFunc( - "gdv_fn_decrypt_dispatcher_5args", - types->i8_ptr_type() /*return_type*/, args, + "gdv_fn_decrypt_dispatcher_5args", types->i8_ptr_type() /*return_type*/, args, reinterpret_cast(gdv_fn_decrypt_dispatcher_5args)); // gdv_mask_first_n and gdv_mask_last_n @@ -1454,8 +1430,7 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { }; engine->AddGlobalMappingForFunc( - "gdv_fn_cast_intervalday_utf8_int32", - types->i64_type() /*return_type*/, args, + "gdv_fn_cast_intervalday_utf8_int32", types->i64_type() /*return_type*/, args, reinterpret_cast(gdv_fn_cast_intervalday_utf8_int32)); // gdv_fn_cast_intervalyear_utf8 @@ -1472,15 +1447,13 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { types->i32_type() /*return_type*/, args, reinterpret_cast(gdv_fn_cast_intervalyear_utf8)); -#define ADD_MAPPING_FOR_NUMERIC_LIST_TYPE_POPULATE_FUNCTION( \ - LLVM_TYPE, DATA_TYPE) \ - args = {types->i64_type(), types->i8_ptr_type(), types->i32_ptr_type(), \ - types->i64_type(), types->LLVM_TYPE##_ptr_type(), \ - types->i32_type(), types->i32_ptr_type()}; \ - engine->AddGlobalMappingForFunc( \ - "gdv_fn_populate_list_" #DATA_TYPE "_vector", \ - types->i32_type() /*return_type*/, args, \ - reinterpret_cast(gdv_fn_populate_list_##DATA_TYPE##_vector)); +#define ADD_MAPPING_FOR_NUMERIC_LIST_TYPE_POPULATE_FUNCTION(LLVM_TYPE, DATA_TYPE) \ + args = {types->i64_type(), types->i8_ptr_type(), types->i32_ptr_type(), \ + types->i64_type(), types->LLVM_TYPE##_ptr_type(), types->i32_type(), \ + types->i32_ptr_type()}; \ + engine->AddGlobalMappingForFunc( \ + "gdv_fn_populate_list_" #DATA_TYPE "_vector", types->i32_type() /*return_type*/, \ + args, reinterpret_cast(gdv_fn_populate_list_##DATA_TYPE##_vector)); ADD_MAPPING_FOR_NUMERIC_LIST_TYPE_POPULATE_FUNCTION(i32, int32_t) ADD_MAPPING_FOR_NUMERIC_LIST_TYPE_POPULATE_FUNCTION(i64, int64_t) @@ -1504,15 +1477,16 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { reinterpret_cast(gdv_fn_cast_intervalyear_utf8_int32)); // gdv_fn_populate_list_varlen_vector - args = {types->i64_type(), // int64_t execution_context - types->i8_ptr_type(), // int8_t* data ptr - types->i32_ptr_type(), // int32_t* offsets ptr - types->i32_ptr_type(), // int32_t* child offsets ptr - types->i64_type(), // int64_t slot - types->i8_ptr_type(), // const char* entry_buf - types->i32_ptr_type(), // int32_t* entry child offsets ptr - types->i32_type(), // int32_t entry child offsets length - types->i32_ptr_type() // int32_t* entry child valid ptr + args = { + types->i64_type(), // int64_t execution_context + types->i8_ptr_type(), // int8_t* data ptr + types->i32_ptr_type(), // int32_t* offsets ptr + types->i32_ptr_type(), // int32_t* child offsets ptr + types->i64_type(), // int64_t slot + types->i8_ptr_type(), // const char* entry_buf + types->i32_ptr_type(), // int32_t* entry child offsets ptr + types->i32_type(), // int32_t entry child offsets length + types->i32_ptr_type() // int32_t* entry child valid ptr }; engine->AddGlobalMappingForFunc( diff --git a/cpp/src/gandiva/gdv_function_stubs.h b/cpp/src/gandiva/gdv_function_stubs.h index 54480ac7f6f4..b77b2240abe9 100644 --- a/cpp/src/gandiva/gdv_function_stubs.h +++ b/cpp/src/gandiva/gdv_function_stubs.h @@ -192,62 +192,60 @@ double gdv_fn_castFLOAT8_varbinary(gdv_int64 context, const char* in, int32_t in // Legacy wrappers for string-based AES-ECB signatures GANDIVA_EXPORT const char* gdv_fn_aes_encrypt_ecb_legacy(int64_t context, const char* data, - int32_t data_len, - const char* key_data, - int32_t key_data_len, - int32_t* out_len); + int32_t data_len, const char* key_data, + int32_t key_data_len, int32_t* out_len); GANDIVA_EXPORT const char* gdv_fn_aes_decrypt_ecb_legacy(int64_t context, const char* data, - int32_t data_len, - const char* key_data, - int32_t key_data_len, - int32_t* out_len); + int32_t data_len, const char* key_data, + int32_t key_data_len, int32_t* out_len); // 3-argument dispatcher: (data, key, mode) GANDIVA_EXPORT -const char* gdv_fn_encrypt_dispatcher_3args( - int64_t context, const char* data, int32_t data_len, - const char* key_data, int32_t key_data_len, const char* mode, - int32_t mode_len, int32_t* out_len); +const char* gdv_fn_encrypt_dispatcher_3args(int64_t context, const char* data, + int32_t data_len, const char* key_data, + int32_t key_data_len, const char* mode, + int32_t mode_len, int32_t* out_len); GANDIVA_EXPORT -const char* gdv_fn_decrypt_dispatcher_3args( - int64_t context, const char* data, int32_t data_len, - const char* key_data, int32_t key_data_len, const char* mode, - int32_t mode_len, int32_t* out_len); +const char* gdv_fn_decrypt_dispatcher_3args(int64_t context, const char* data, + int32_t data_len, const char* key_data, + int32_t key_data_len, const char* mode, + int32_t mode_len, int32_t* out_len); // 4-argument dispatcher: (data, key, mode, iv) GANDIVA_EXPORT -const char* gdv_fn_encrypt_dispatcher_4args( - int64_t context, const char* data, int32_t data_len, - const char* key_data, int32_t key_data_len, const char* mode, - int32_t mode_len, const char* iv_data, int32_t iv_data_len, - int32_t* out_len); +const char* gdv_fn_encrypt_dispatcher_4args(int64_t context, const char* data, + int32_t data_len, const char* key_data, + int32_t key_data_len, const char* mode, + int32_t mode_len, const char* iv_data, + int32_t iv_data_len, int32_t* out_len); GANDIVA_EXPORT -const char* gdv_fn_decrypt_dispatcher_4args( - int64_t context, const char* data, int32_t data_len, - const char* key_data, int32_t key_data_len, const char* mode, - int32_t mode_len, const char* iv_data, int32_t iv_data_len, - int32_t* out_len); +const char* gdv_fn_decrypt_dispatcher_4args(int64_t context, const char* data, + int32_t data_len, const char* key_data, + int32_t key_data_len, const char* mode, + int32_t mode_len, const char* iv_data, + int32_t iv_data_len, int32_t* out_len); // 5-argument dispatcher: (data, key, mode, iv, fifth_argument) GANDIVA_EXPORT -const char* gdv_fn_encrypt_dispatcher_5args( - int64_t context, const char* data, int32_t data_len, - const char* key_data, int32_t key_data_len, const char* mode, - int32_t mode_len, const char* iv_data, int32_t iv_data_len, - const char* fifth_argument, int32_t fifth_argument_len, - int32_t* out_len); - -GANDIVA_EXPORT -const char* gdv_fn_decrypt_dispatcher_5args( - int64_t context, const char* data, int32_t data_len, - const char* key_data, int32_t key_data_len, const char* mode, - int32_t mode_len, const char* iv_data, int32_t iv_data_len, - const char* fifth_argument, int32_t fifth_argument_len, - int32_t* out_len); +const char* gdv_fn_encrypt_dispatcher_5args(int64_t context, const char* data, + int32_t data_len, const char* key_data, + int32_t key_data_len, const char* mode, + int32_t mode_len, const char* iv_data, + int32_t iv_data_len, + const char* fifth_argument, + int32_t fifth_argument_len, int32_t* out_len); + +GANDIVA_EXPORT +const char* gdv_fn_decrypt_dispatcher_5args(int64_t context, const char* data, + int32_t data_len, const char* key_data, + int32_t key_data_len, const char* mode, + int32_t mode_len, const char* iv_data, + int32_t iv_data_len, + const char* fifth_argument, + int32_t fifth_argument_len, int32_t* out_len); GANDIVA_EXPORT const char* gdv_mask_first_n_utf8_int32(int64_t context, const char* data, diff --git a/cpp/src/gandiva/gdv_function_stubs_test.cc b/cpp/src/gandiva/gdv_function_stubs_test.cc index bfb34eeb31d8..171c60eef256 100644 --- a/cpp/src/gandiva/gdv_function_stubs_test.cc +++ b/cpp/src/gandiva/gdv_function_stubs_test.cc @@ -22,10 +22,10 @@ #include #include "arrow/util/logging.h" -#include "gandiva/execution_context.h" -#include "gandiva/encrypt_utils_ecb.h" #include "gandiva/encrypt_utils_cbc.h" +#include "gandiva/encrypt_utils_ecb.h" #include "gandiva/encrypt_utils_gcm.h" +#include "gandiva/execution_context.h" namespace gandiva { @@ -1360,16 +1360,15 @@ TEST(TestGdvFnStubs, TestAesEncryptDecrypt16) { auto mode_len = static_cast(mode.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); - const char* cipher = gdv_fn_encrypt_dispatcher_3args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &cipher_len); - const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &decrypted_len); + const char* cipher = + gdv_fn_encrypt_dispatcher_3args(ctx_ptr, data.c_str(), data_len, key16.c_str(), + key16_len, mode.c_str(), mode_len, &cipher_len); + const char* decrypted_value = + gdv_fn_decrypt_dispatcher_3args(ctx_ptr, cipher, cipher_len, key16.c_str(), + key16_len, mode.c_str(), mode_len, &decrypted_len); EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + std::string(reinterpret_cast(decrypted_value), decrypted_len)); } TEST(TestGdvFnStubs, TestAesEncryptDecrypt24) { @@ -1384,17 +1383,16 @@ TEST(TestGdvFnStubs, TestAesEncryptDecrypt24) { auto mode_len = static_cast(mode.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); - const char* cipher = gdv_fn_encrypt_dispatcher_3args( - ctx_ptr, data.c_str(), data_len, key24.c_str(), key24_len, mode.c_str(), - mode_len, &cipher_len); + const char* cipher = + gdv_fn_encrypt_dispatcher_3args(ctx_ptr, data.c_str(), data_len, key24.c_str(), + key24_len, mode.c_str(), mode_len, &cipher_len); - const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( - ctx_ptr, cipher, cipher_len, key24.c_str(), key24_len, mode.c_str(), - mode_len, &decrypted_len); + const char* decrypted_value = + gdv_fn_decrypt_dispatcher_3args(ctx_ptr, cipher, cipher_len, key24.c_str(), + key24_len, mode.c_str(), mode_len, &decrypted_len); EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + std::string(reinterpret_cast(decrypted_value), decrypted_len)); } TEST(TestGdvFnStubs, TestAesEncryptDecrypt32) { @@ -1409,17 +1407,16 @@ TEST(TestGdvFnStubs, TestAesEncryptDecrypt32) { auto mode_len = static_cast(mode.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); - const char* cipher = gdv_fn_encrypt_dispatcher_3args( - ctx_ptr, data.c_str(), data_len, key32.c_str(), key32_len, mode.c_str(), - mode_len, &cipher_len); + const char* cipher = + gdv_fn_encrypt_dispatcher_3args(ctx_ptr, data.c_str(), data_len, key32.c_str(), + key32_len, mode.c_str(), mode_len, &cipher_len); - const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( - ctx_ptr, cipher, cipher_len, key32.c_str(), key32_len, mode.c_str(), - mode_len, &decrypted_len); + const char* decrypted_value = + gdv_fn_decrypt_dispatcher_3args(ctx_ptr, cipher, cipher_len, key32.c_str(), + key32_len, mode.c_str(), mode_len, &decrypted_len); EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + std::string(reinterpret_cast(decrypted_value), decrypted_len)); } TEST(TestGdvFnStubs, TestAesEncryptDecryptValidation) { @@ -1435,16 +1432,14 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptValidation) { std::string cipher = "12345678abcdefgh12345678abcdefghb"; auto cipher_len = static_cast(cipher.length()); - gdv_fn_encrypt_dispatcher_3args(ctx_ptr, data.c_str(), data_len, - key33.c_str(), key33_len, mode.c_str(), - mode_len, &cipher_len); + gdv_fn_encrypt_dispatcher_3args(ctx_ptr, data.c_str(), data_len, key33.c_str(), + key33_len, mode.c_str(), mode_len, &cipher_len); EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Unsupported key length for AES-ECB")); ctx.Reset(); - gdv_fn_decrypt_dispatcher_3args(ctx_ptr, cipher.c_str(), cipher_len, - key33.c_str(), key33_len, mode.c_str(), - mode_len, &decrypted_len); + gdv_fn_decrypt_dispatcher_3args(ctx_ptr, cipher.c_str(), cipher_len, key33.c_str(), + key33_len, mode.c_str(), mode_len, &decrypted_len); EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Unsupported key length for AES-ECB")); ctx.Reset(); @@ -1463,17 +1458,16 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptModeEcb) { auto mode_len = static_cast(mode.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); - const char* cipher = gdv_fn_encrypt_dispatcher_3args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &cipher_len); + const char* cipher = + gdv_fn_encrypt_dispatcher_3args(ctx_ptr, data.c_str(), data_len, key16.c_str(), + key16_len, mode.c_str(), mode_len, &cipher_len); EXPECT_GT(cipher_len, 0); - const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &decrypted_len); + const char* decrypted_value = + gdv_fn_decrypt_dispatcher_3args(ctx_ptr, cipher, cipher_len, key16.c_str(), + key16_len, mode.c_str(), mode_len, &decrypted_len); EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + std::string(reinterpret_cast(decrypted_value), decrypted_len)); } TEST(TestGdvFnStubs, TestAesEncryptDecryptModeValidation) { @@ -1489,23 +1483,19 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptModeValidation) { int64_t ctx_ptr = reinterpret_cast(&ctx); // Test encrypt with invalid mode - gdv_fn_encrypt_dispatcher_3args(ctx_ptr, data.c_str(), data_len, - key16.c_str(), key16_len, - invalid_mode.c_str(), invalid_mode_len, - &cipher_len); - EXPECT_THAT(ctx.get_error(), - ::testing::HasSubstr("Unsupported encryption mode")); + gdv_fn_encrypt_dispatcher_3args(ctx_ptr, data.c_str(), data_len, key16.c_str(), + key16_len, invalid_mode.c_str(), invalid_mode_len, + &cipher_len); + EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Unsupported encryption mode")); ctx.Reset(); // Test decrypt with invalid mode std::string cipher = "12345678abcdefgh12345678abcdefgh"; auto cipher_len_val = static_cast(cipher.length()); - gdv_fn_decrypt_dispatcher_3args(ctx_ptr, cipher.c_str(), cipher_len_val, - key16.c_str(), key16_len, - invalid_mode.c_str(), invalid_mode_len, - &decrypted_len); - EXPECT_THAT(ctx.get_error(), - ::testing::HasSubstr("Unsupported decryption mode")); + gdv_fn_decrypt_dispatcher_3args(ctx_ptr, cipher.c_str(), cipher_len_val, key16.c_str(), + key16_len, invalid_mode.c_str(), invalid_mode_len, + &decrypted_len); + EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Unsupported decryption mode")); ctx.Reset(); } @@ -1525,17 +1515,16 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptGcmIvOnly) { int64_t ctx_ptr = reinterpret_cast(&ctx); const char* cipher = gdv_fn_encrypt_dispatcher_5args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, nullptr, 0, &cipher_len); + ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), mode_len, + iv.c_str(), iv_len, nullptr, 0, &cipher_len); EXPECT_GT(cipher_len, 0); const char* decrypted_value = gdv_fn_decrypt_dispatcher_5args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, nullptr, 0, &decrypted_len); + ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), mode_len, + iv.c_str(), iv_len, nullptr, 0, &decrypted_len); EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + std::string(reinterpret_cast(decrypted_value), decrypted_len)); } TEST(TestGdvFnStubs, TestAesEncryptDecryptGcmWithAad) { @@ -1555,17 +1544,16 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptGcmWithAad) { int64_t ctx_ptr = reinterpret_cast(&ctx); const char* cipher = gdv_fn_encrypt_dispatcher_5args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, aad.c_str(), aad_len, &cipher_len); + ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), mode_len, + iv.c_str(), iv_len, aad.c_str(), aad_len, &cipher_len); EXPECT_GT(cipher_len, 0); const char* decrypted_value = gdv_fn_decrypt_dispatcher_5args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, aad.c_str(), aad_len, &decrypted_len); + ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), mode_len, + iv.c_str(), iv_len, aad.c_str(), aad_len, &decrypted_len); EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + std::string(reinterpret_cast(decrypted_value), decrypted_len)); } // Tests for shorthand mode: AES-ECB (defaults to PKCS7) @@ -1581,18 +1569,17 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptShorthandEcb) { auto mode_len = static_cast(mode.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); - const char* cipher = gdv_fn_encrypt_dispatcher_3args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &cipher_len); + const char* cipher = + gdv_fn_encrypt_dispatcher_3args(ctx_ptr, data.c_str(), data_len, key16.c_str(), + key16_len, mode.c_str(), mode_len, &cipher_len); EXPECT_GT(cipher_len, 0); - const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &decrypted_len); + const char* decrypted_value = + gdv_fn_decrypt_dispatcher_3args(ctx_ptr, cipher, cipher_len, key16.c_str(), + key16_len, mode.c_str(), mode_len, &decrypted_len); EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + std::string(reinterpret_cast(decrypted_value), decrypted_len)); } // Tests for explicit mode: AES-ECB-PKCS7 @@ -1608,18 +1595,17 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptExplicitEcbPkcs7) { auto mode_len = static_cast(mode.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); - const char* cipher = gdv_fn_encrypt_dispatcher_3args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &cipher_len); + const char* cipher = + gdv_fn_encrypt_dispatcher_3args(ctx_ptr, data.c_str(), data_len, key16.c_str(), + key16_len, mode.c_str(), mode_len, &cipher_len); EXPECT_GT(cipher_len, 0); - const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &decrypted_len); + const char* decrypted_value = + gdv_fn_decrypt_dispatcher_3args(ctx_ptr, cipher, cipher_len, key16.c_str(), + key16_len, mode.c_str(), mode_len, &decrypted_len); EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + std::string(reinterpret_cast(decrypted_value), decrypted_len)); } // Tests for shorthand mode: AES-CBC (defaults to PKCS7) @@ -1638,17 +1624,16 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptShorthandCbc) { int64_t ctx_ptr = reinterpret_cast(&ctx); const char* cipher = gdv_fn_encrypt_dispatcher_4args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, &cipher_len); + ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), mode_len, + iv.c_str(), iv_len, &cipher_len); EXPECT_GT(cipher_len, 0); const char* decrypted_value = gdv_fn_decrypt_dispatcher_4args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, &decrypted_len); + ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), mode_len, + iv.c_str(), iv_len, &decrypted_len); EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + std::string(reinterpret_cast(decrypted_value), decrypted_len)); } // Tests for explicit mode: AES-CBC-PKCS7 @@ -1667,17 +1652,16 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptExplicitCbcPkcs7) { int64_t ctx_ptr = reinterpret_cast(&ctx); const char* cipher = gdv_fn_encrypt_dispatcher_4args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, &cipher_len); + ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), mode_len, + iv.c_str(), iv_len, &cipher_len); EXPECT_GT(cipher_len, 0); const char* decrypted_value = gdv_fn_decrypt_dispatcher_4args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, &decrypted_len); + ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), mode_len, + iv.c_str(), iv_len, &decrypted_len); EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + std::string(reinterpret_cast(decrypted_value), decrypted_len)); } // Tests for explicit mode: AES-CBC-NONE (no padding) @@ -1697,17 +1681,16 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptCbcNone) { int64_t ctx_ptr = reinterpret_cast(&ctx); const char* cipher = gdv_fn_encrypt_dispatcher_4args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, &cipher_len); + ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), mode_len, + iv.c_str(), iv_len, &cipher_len); EXPECT_GT(cipher_len, 0); const char* decrypted_value = gdv_fn_decrypt_dispatcher_4args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, &decrypted_len); + ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), mode_len, + iv.c_str(), iv_len, &decrypted_len); EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + std::string(reinterpret_cast(decrypted_value), decrypted_len)); } } // namespace gandiva diff --git a/cpp/src/gandiva/llvm_generator.cc b/cpp/src/gandiva/llvm_generator.cc index dca4a97079ce..a81bee57226f 100644 --- a/cpp/src/gandiva/llvm_generator.cc +++ b/cpp/src/gandiva/llvm_generator.cc @@ -98,7 +98,7 @@ Status LLVMGenerator::Build(const ExpressionVector& exprs, SelectionVector::Mode // Compile and inject into the process' memory the generated function. ARROW_RETURN_NOT_OK(engine_->FinalizeModule()); - + // setup the jit functions for each expression. for (auto& compiled_expr : compiled_exprs_) { auto fn_name = compiled_expr->GetFunctionName(mode); @@ -419,11 +419,12 @@ Status LLVMGenerator::CodeGenExprValue(DexPtr value_expr, int buffer_count, {arg_context_ptr, output_buffer_ptr_ref, output_offset_ref, loop_var, output_value->data(), output_value->length()}); } else if (output_type_id == arrow::Type::STRUCT) { - auto slot_offset = builder->CreateGEP(types()->IRType(output_type_id), output_ref, loop_var); - builder->CreateStore(output_value->data(), slot_offset); + auto slot_offset = + builder->CreateGEP(types()->IRType(output_type_id), output_ref, loop_var); + builder->CreateStore(output_value->data(), slot_offset); } else if (output_type_id == arrow::Type::LIST) { auto output_list_internal_type = output->Type()->field(0)->type()->id(); - + if (arrow::is_binary_like(output_list_internal_type)) { auto output_list_value = std::dynamic_pointer_cast(output_value); llvm::Value* child_output_offset_ref = GetChildOffsetsReference( @@ -434,21 +435,25 @@ Status LLVMGenerator::CodeGenExprValue(DexPtr value_expr, int buffer_count, child_output_offset_ref, loop_var, output_list_value->data(), output_list_value->child_offsets(), output_list_value->offsets_length()}); } else if (output_list_internal_type == arrow::Type::INT32) { - AddFunctionCall("gdv_fn_populate_list_int32_t_vector", types()->i32_type(), - {arg_context_ptr, output_buffer_ptr_ref, output_offset_ref, - loop_var, output_value->data(), output_value->length(), output_value->validity()}); + AddFunctionCall( + "gdv_fn_populate_list_int32_t_vector", types()->i32_type(), + {arg_context_ptr, output_buffer_ptr_ref, output_offset_ref, loop_var, + output_value->data(), output_value->length(), output_value->validity()}); } else if (output_list_internal_type == arrow::Type::INT64) { - AddFunctionCall("gdv_fn_populate_list_int64_t_vector", types()->i32_type(), - {arg_context_ptr, output_buffer_ptr_ref, output_offset_ref, - loop_var, output_value->data(), output_value->length(), output_value->validity()}); + AddFunctionCall( + "gdv_fn_populate_list_int64_t_vector", types()->i32_type(), + {arg_context_ptr, output_buffer_ptr_ref, output_offset_ref, loop_var, + output_value->data(), output_value->length(), output_value->validity()}); } else if (output_list_internal_type == arrow::Type::FLOAT) { - AddFunctionCall("gdv_fn_populate_list_float_vector", types()->i32_type(), - {arg_context_ptr, output_buffer_ptr_ref, output_offset_ref, - loop_var, output_value->data(), output_value->length(), output_value->validity()}); + AddFunctionCall( + "gdv_fn_populate_list_float_vector", types()->i32_type(), + {arg_context_ptr, output_buffer_ptr_ref, output_offset_ref, loop_var, + output_value->data(), output_value->length(), output_value->validity()}); } else if (output_list_internal_type == arrow::Type::DOUBLE) { - AddFunctionCall("gdv_fn_populate_list_double_vector", types()->i32_type(), - {arg_context_ptr, output_buffer_ptr_ref, output_offset_ref, - loop_var, output_value->data(), output_value->length(), output_value->validity()}); + AddFunctionCall( + "gdv_fn_populate_list_double_vector", types()->i32_type(), + {arg_context_ptr, output_buffer_ptr_ref, output_offset_ref, loop_var, + output_value->data(), output_value->length(), output_value->validity()}); } else { return Status::NotImplemented("list internal type ", output->Type()->field(0)->type()->ToString(), @@ -551,7 +556,7 @@ void LLVMGenerator::ComputeBitMapsForExpr(const CompiledExpr& compiled_expr, /// /// 1. Do the intersection of input/local bitmaps to generate a temporary bitmap. /// 2. copy just the relevant bits from the temporary bitmap to the output bitmap. - + LocalBitMapsHolder bit_map_holder(eval_batch->num_records(), 1); uint8_t* temp_bitmap = bit_map_holder.GetLocalBitMap(0); accumulator.ComputeResult(temp_bitmap); @@ -680,7 +685,7 @@ void LLVMGenerator::Visitor::Visit(const VectorReadFixedLenValueListDex& dex) { auto dt = dex.FieldType(); if (dt->id() == arrow::Type::LIST) { - type = types->IRType(dt->fields()[0]->type()->id() ); + type = types->IRType(dt->fields()[0]->type()->id()); } arrow::Type::type at32 = arrow::Type::INT32; @@ -713,15 +718,17 @@ void LLVMGenerator::Visitor::Visit(const VectorReadFixedLenValueListDex& dex) { llvm::Value* data_list = builder->CreateGEP(type, slot_ref, slot_index); auto list_len_var = builder->CreateIntCast(list_len, types->i64_type(), true); - llvm::Value* vv_end = builder->CreateLoad(generator_->types()->i64_type(),validity_index_var_, "vv_end"); + llvm::Value* vv_end = + builder->CreateLoad(generator_->types()->i64_type(), validity_index_var_, "vv_end"); -llvm::Value* updated_validity_index_var = builder->CreateAdd( - vv_end, list_len_var, "validity_index_var+offset"); + llvm::Value* updated_validity_index_var = + builder->CreateAdd(vv_end, list_len_var, "validity_index_var+offset"); builder->CreateStore(updated_validity_index_var, validity_index_var_); llvm::Value* b_slot_index = builder->CreateAdd(loop_var_, GetSliceOffset(dex.ValidityIdx())); - llvm::Value* b_slot_ref = GetBufferReference(dex.ChildValidityIdx(), kBufferTypeValidity, dex.Field()); + llvm::Value* b_slot_ref = + GetBufferReference(dex.ChildValidityIdx(), kBufferTypeValidity, dex.Field()); llvm::Value* validity = builder->CreateGEP(type32, b_slot_ref, b_slot_index); std::string str3 = "validity:"; @@ -731,9 +738,10 @@ llvm::Value* updated_validity_index_var = builder->CreateAdd( } ADD_VISITOR_TRACE("visit fixed-len data list vector " + dex.FieldName() + " length %T", list_len); - ADD_VISITOR_TRACE("visit fixed-len data list vector " + dex.FieldName() + " updated_validity_index_var %T", + ADD_VISITOR_TRACE("visit fixed-len data list vector " + dex.FieldName() + + " updated_validity_index_var %T", updated_validity_index_var); - + result_.reset(new LValue(data_list, list_len, validity)); } @@ -804,7 +812,7 @@ void LLVMGenerator::Visitor::Visit(const VectorReadVarLenValueListDex& dex) { // => offset_start = offsets[loop_var] slot = builder->CreateGEP(type, offsets_slot_ref, offsets_slot_index); llvm::Value* offset_start = builder->CreateLoad(type, slot, "offset_start"); - + // => offset_end = offsets[loop_var + 1] llvm::Value* offsets_slot_index_next = builder->CreateAdd( offsets_slot_index, generator_->types()->i64_constant(1), "loop_var+1"); @@ -833,7 +841,7 @@ void LLVMGenerator::Visitor::Visit(const VectorReadVarLenValueListDex& dex) { llvm::Value* data_slot_ref = GetBufferReference(dex.DataIdx(), kBufferTypeData, dex.Field()); llvm::Value* data_value = builder->CreateGEP(type, data_slot_ref, child_offset_start); - + result_.reset(new ListLValue(data_value, child_offsets, list_data_length)); } @@ -1035,10 +1043,8 @@ void LLVMGenerator::Visitor::Visit(const NullableInternalFuncDex& dex) { auto params = BuildParams(dex.get_holder_idx(), dex.args(), true, native_function->NeedsContext()); - - auto arrow_return_type = dex.func_descriptor()->return_type(); - + bool passLoopVars = false; for (auto& p : dex.func_descriptor()->params()) { if (p->id() == arrow::Type::LIST) { @@ -1046,10 +1052,10 @@ void LLVMGenerator::Visitor::Visit(const NullableInternalFuncDex& dex) { break; } } - if (passLoopVars) - { + if (passLoopVars) { params.push_back(loop_var_); - auto valid_var = builder->CreateLoad(types->i64_type(), validity_index_var_, "loaded_var"); + auto valid_var = + builder->CreateLoad(types->i64_type(), validity_index_var_, "loaded_var"); params.push_back(valid_var); } @@ -1486,13 +1492,12 @@ LValuePtr LLVMGenerator::Visitor::BuildFunctionCall(const NativeFunction* func, } if (arrow_return_type_id == arrow::Type::LIST) { - result_len_ptr = new llvm::AllocaInst(generator_->types()->i32_type(), 0, "result_len", entry_block_); params->push_back(result_len_ptr); has_arena_allocs_ = true; valid_ptr = new llvm::AllocaInst(generator_->types()->i32_ptr_type(), 0, - "valid_ptr", entry_block_); + "valid_ptr", entry_block_); params->push_back(valid_ptr); } @@ -1506,7 +1511,7 @@ LValuePtr LLVMGenerator::Visitor::BuildFunctionCall(const NativeFunction* func, (result_len_ptr == nullptr) ? nullptr : builder->CreateLoad(result_len_ptr->getAllocatedType(), result_len_ptr); - auto validity = + auto validity = (valid_ptr == nullptr) ? nullptr : builder->CreateLoad(generator_->types()->i32_ptr_type(), valid_ptr); @@ -1550,7 +1555,6 @@ std::vector LLVMGenerator::Visitor::BuildParams( // append all the parameters corresponding to this LValue. result_ref.AppendFunctionParams(¶ms); - // build validity. if (with_validity) { llvm::Value* validity_expr = BuildCombinedValidity(pair->validity_exprs()); diff --git a/cpp/src/gandiva/llvm_types.h b/cpp/src/gandiva/llvm_types.h index 98e40667ece7..269c021f1344 100644 --- a/cpp/src/gandiva/llvm_types.h +++ b/cpp/src/gandiva/llvm_types.h @@ -46,7 +46,9 @@ class GANDIVA_EXPORT LLVMTypes { llvm::Type* i128_type() { return llvm::Type::getInt128Ty(context_); } - llvm::VectorType* list_type() { return llvm::ScalableVectorType::get(i8_type(), (unsigned int)0); } + llvm::VectorType* list_type() { + return llvm::ScalableVectorType::get(i8_type(), (unsigned int)0); + } llvm::StructType* i128_split_type() { // struct with high/low bits (see decimal_ops.cc:DecimalSplit) @@ -95,9 +97,7 @@ class GANDIVA_EXPORT LLVMTypes { return llvm::ConstantFP::get(float_type(), val); } - llvm::LLVMContext* get_context() { - return &context_; - } + llvm::LLVMContext* get_context() { return &context_; } llvm::Constant* double_constant(double val) { return llvm::ConstantFP::get(double_type(), val); @@ -121,7 +121,7 @@ class GANDIVA_EXPORT LLVMTypes { // offsets buffer is to separate data into list // not support nested list if (data_type->id() == arrow::Type::LIST) { - //Nested lists aren't supported yet. + // Nested lists aren't supported yet. if (data_type->field(0)->type()->id() == arrow::Type::LIST) { return NULL; } diff --git a/cpp/src/gandiva/lvalue.h b/cpp/src/gandiva/lvalue.h index 04862dc9d18c..bc97b6b4073c 100644 --- a/cpp/src/gandiva/lvalue.h +++ b/cpp/src/gandiva/lvalue.h @@ -53,7 +53,7 @@ class GANDIVA_EXPORT LValue { virtual std::string to_string() { std::string s = "Base LValue"; - + std::string str1 = "data:"; if (data_) { llvm::raw_string_ostream output1(str1); @@ -107,8 +107,7 @@ class GANDIVA_EXPORT ListLValue : public LValue { llvm::Value* validity = NULLPTR) : LValue(data, NULLPTR, validity), child_offsets_(child_offsets), - offsets_length_(offsets_length) { - } + offsets_length_(offsets_length) {} llvm::Value* child_offsets() { return child_offsets_; } @@ -121,7 +120,7 @@ class GANDIVA_EXPORT ListLValue : public LValue { params->push_back(validity_); } - virtual std::string to_string() override { + std::string to_string() override { std::string s = "List LValue"; s += " " + LValue::to_string(); diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h index 1277ccff3399..c0a914c47617 100644 --- a/cpp/src/gandiva/precompiled/types.h +++ b/cpp/src/gandiva/precompiled/types.h @@ -19,7 +19,6 @@ #include - #include "gandiva/array_ops.h" #include "gandiva/gdv_function_stubs.h" diff --git a/cpp/src/gandiva/projector.cc b/cpp/src/gandiva/projector.cc index 64c283e9fe03..0979abc5f7cb 100644 --- a/cpp/src/gandiva/projector.cc +++ b/cpp/src/gandiva/projector.cc @@ -147,7 +147,7 @@ Status Projector::Evaluate(const arrow::RecordBatch& batch, ++idx; } ARROW_RETURN_NOT_OK( - llvm_generator_->Execute(batch, selection_vector, output_data_vecs)); + llvm_generator_->Execute(batch, selection_vector, output_data_vecs)); return Status::OK(); } @@ -197,15 +197,20 @@ Status Projector::Evaluate(const arrow::RecordBatch& batch, * Otherwise, child data offsets buffer length is data length + 1 * and offset data is int32_t, need use buffer->size()/4 - 1 */ - child_data_size = child_data->buffers[child_data_buffer_index]->size() / int_data_size - 1; + child_data_size = + child_data->buffers[child_data_buffer_index]->size() / int_data_size - 1; } else if (child_data->type->id() == arrow::Type::INT32) { - child_data_size = child_data->buffers[child_data_buffer_index]->size() / int_data_size; + child_data_size = + child_data->buffers[child_data_buffer_index]->size() / int_data_size; } else if (child_data->type->id() == arrow::Type::INT64) { - child_data_size = child_data->buffers[child_data_buffer_index]->size() / double_data_size; + child_data_size = + child_data->buffers[child_data_buffer_index]->size() / double_data_size; } else if (child_data->type->id() == arrow::Type::FLOAT) { - child_data_size = child_data->buffers[child_data_buffer_index]->size() / int_data_size; + child_data_size = + child_data->buffers[child_data_buffer_index]->size() / int_data_size; } else if (child_data->type->id() == arrow::Type::DOUBLE) { - child_data_size = child_data->buffers[child_data_buffer_index]->size() / double_data_size; + child_data_size = + child_data->buffers[child_data_buffer_index]->size() / double_data_size; } auto new_child_data = arrow::ArrayData::Make( child_data->type, child_data_size, child_data->buffers, child_data->offset); @@ -278,14 +283,16 @@ Status Projector::AllocArrayData(const DataTypePtr& type, int64_t num_records, } buffers.push_back(std::move(data_buffer)); - ARROW_ASSIGN_OR_RAISE(auto data_valid_buffer, arrow::AllocateResizableBuffer(data_len, pool)); + ARROW_ASSIGN_OR_RAISE(auto data_valid_buffer, + arrow::AllocateResizableBuffer(data_len, pool)); if (type->id() == arrow::Type::LIST) { auto internal_type = type->field(0)->type(); ArrayDataPtr child_data; if (arrow::is_primitive(internal_type->id())) { - child_data = arrow::ArrayData::Make(internal_type, 0 /*initialize length*/, - {std::move(data_valid_buffer), std::move(buffers[2])}, 0); + child_data = arrow::ArrayData::Make( + internal_type, 0 /*initialize length*/, + {std::move(data_valid_buffer), std::move(buffers[2])}, 0); } if (arrow::is_binary_like(internal_type->id())) { child_data = arrow::ArrayData::Make( @@ -349,8 +356,7 @@ Status Projector::ValidateArrayDataCapacity(const arrow::ArrayData& array_data, Status::Invalid("Data buffer too small for ", field.name())); } else if (type_id == arrow::Type::LIST) { return Status::OK(); - } - else { + } else { return Status::Invalid("Unsupported output data type " + field.type()->ToString()); } diff --git a/cpp/src/gandiva/projector.h b/cpp/src/gandiva/projector.h index da81e79e535c..c92394ead570 100644 --- a/cpp/src/gandiva/projector.h +++ b/cpp/src/gandiva/projector.h @@ -133,7 +133,6 @@ class GANDIVA_EXPORT Projector { Projector(std::unique_ptr llvm_generator, SchemaPtr schema, const FieldVector& output_fields, std::shared_ptr); - /// Validate that the ArrayData has sufficient capacity to accommodate 'num_records'. Status ValidateArrayDataCapacity(const arrow::ArrayData& array_data, const arrow::Field& field, int64_t num_records) const; diff --git a/cpp/src/gandiva/tests/CMakeLists.txt b/cpp/src/gandiva/tests/CMakeLists.txt index 09428a870567..48aec99a114b 100644 --- a/cpp/src/gandiva/tests/CMakeLists.txt +++ b/cpp/src/gandiva/tests/CMakeLists.txt @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. - add_gandiva_test(projector-test SOURCES binary_test.cc diff --git a/cpp/src/gandiva/tests/list_test.cc b/cpp/src/gandiva/tests/list_test.cc index abc7b5d7091b..a76428bea740 100644 --- a/cpp/src/gandiva/tests/list_test.cc +++ b/cpp/src/gandiva/tests/list_test.cc @@ -83,10 +83,10 @@ void _build_list_array(const vector& values, const vector& l template void _build_list_array2(const vector& values, const vector& length, - const vector& validity, const vector& innerValidity, arrow::MemoryPool* pool, - ArrayPtr* array) { - return _build_list_array(values, length, validity, pool, array); - } + const vector& validity, const vector& innerValidity, + arrow::MemoryPool* pool, ArrayPtr* array) { + return _build_list_array(values, length, validity, pool, array); +} /* * expression: @@ -136,7 +136,7 @@ void _test_list_type_field_alias(DataTypePtr type, ArrayPtr array, TEST_F(TestList, TestArrayRemove) { // schema for input fields auto field_b = field("b", int32()); - + auto field_a = field("a", list(int32())); auto schema = arrow::schema({field_a, field_b}); @@ -147,7 +147,7 @@ TEST_F(TestList, TestArrayRemove) { int num_records = 2; auto array_b = MakeArrowArrayInt32({42, 42}, {true, true}); - + ArrayPtr array_a; _build_list_array2( {10, 42, 30, 42, 70, 80}, @@ -202,7 +202,7 @@ auto bitmap_buffer2 = arrow::AllocateBuffer(size, pool_); auto offsets_buffer2 = arrow::AllocateBuffer(offsets_len, pool_); buffers2.push_back(*std::move(offsets_buffer2)); std::shared_ptr dt2 = std::make_shared(); - + auto array_data_child = arrow::ArrayData::Make(dt2, num_records2, buffers2, 0, 0); array_data_child->buffers = std::move(buffers2); @@ -214,7 +214,7 @@ auto array_data = arrow::ArrayData::Make(dt, num_records2, buffers, kids, 0, 0); array_data->buffers = std::move(buffers); outputs2.push_back(array_data); - + status = projector->Evaluate(*(in_batch.get()), outputs2); EXPECT_TRUE(status.ok()) << status.message(); arrow::ArrayData ad = *outputs2.at(0); @@ -243,7 +243,7 @@ for (auto& array_data : outputs2) { array_data = arrow::ArrayData::Make(array_data->type, array_data->length, array_data->buffers, {new_child_data}, array_data->null_count, array_data->offset); - + auto newArray = arrow::MakeArray(array_data); //arrow::ArraySpan sp(newArray); @@ -313,9 +313,9 @@ TEST_F(TestList, TestListInt32LiteralContains) { auto node2 = TreeExprBuilder::MakeLiteral(42); field_nodes.push_back(node2); - - auto func_node = TreeExprBuilder::MakeFunction("array_contains", field_nodes, res->type()); - auto expr = TreeExprBuilder::MakeExpression(func_node, res); + + auto func_node = TreeExprBuilder::MakeFunction("array_contains", field_nodes, +res->type()); auto expr = TreeExprBuilder::MakeExpression(func_node, res); //////// // Build a projector for the expressions. diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc index e0c2e5164a5f..3fbe80d4cc34 100644 --- a/cpp/src/gandiva/tests/projector_test.cc +++ b/cpp/src/gandiva/tests/projector_test.cc @@ -2823,11 +2823,10 @@ TEST_F(TestProjector, TestAesEncryptDecrypt) { const char* key_24_bytes = "12345678abcdefgh12345678"; const char* key_32_bytes = "12345678abcdefgh12345678abcdefgh"; - auto array_data = MakeArrowArrayUtf8({"abc", "some words", "to be encrypted"}, - {true, true, true}); + auto array_data = + MakeArrowArrayUtf8({"abc", "some words", "to be encrypted"}, {true, true, true}); auto array_key = - MakeArrowArrayUtf8({key_16_bytes, key_24_bytes, key_32_bytes}, - {true, true, true}); + MakeArrowArrayUtf8({key_16_bytes, key_24_bytes, key_32_bytes}, {true, true, true}); auto array_holder_en = MakeArrowArrayUtf8({"", "", ""}, {true, true, true}); diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 736c13d4522a..2134841a5387 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -76,8 +76,8 @@ ARROW_GOOGLE_CLOUD_CPP_BUILD_VERSION=v2.22.0 ARROW_GOOGLE_CLOUD_CPP_BUILD_SHA256_CHECKSUM=0c68782e57959c82e0c81def805c01460a042c1aae0c2feee905acaa2a2dc9bf ARROW_GRPC_BUILD_VERSION=v1.46.3 ARROW_GRPC_BUILD_SHA256_CHECKSUM=d6cbf22cb5007af71b61c6be316a79397469c58c82a942552a62e708bce60964 -ARROW_GTEST_BUILD_VERSION=1.11.0 -ARROW_GTEST_BUILD_SHA256_CHECKSUM=b4870bf121ff7795ba20d20bcdd8627b8e088f2d1dab299a031c1034eddc93d5 +ARROW_GTEST_BUILD_VERSION=1.17.0 +ARROW_GTEST_BUILD_SHA256_CHECKSUM=65fab701d9829d38cb77c14acdc431d2108bfdbf8979e40eb8ae567edf10b27c ARROW_JEMALLOC_BUILD_VERSION=5.3.0 ARROW_JEMALLOC_BUILD_SHA256_CHECKSUM=2db82d1e7119df3e71b7640219b6dfe84789bc0537983c3b7ac4f7189aecfeaa ARROW_LZ4_BUILD_VERSION=v1.10.0 @@ -151,7 +151,7 @@ DEPENDENCIES=( "ARROW_GLOG_URL glog-${ARROW_GLOG_BUILD_VERSION}.tar.gz https://github.com/google/glog/archive/${ARROW_GLOG_BUILD_VERSION}.tar.gz" "ARROW_GOOGLE_CLOUD_CPP_URL google-cloud-cpp-${ARROW_GOOGLE_CLOUD_CPP_BUILD_VERSION}.tar.gz https://github.com/googleapis/google-cloud-cpp/archive/${ARROW_GOOGLE_CLOUD_CPP_BUILD_VERSION}.tar.gz" "ARROW_GRPC_URL grpc-${ARROW_GRPC_BUILD_VERSION}.tar.gz https://github.com/grpc/grpc/archive/${ARROW_GRPC_BUILD_VERSION}.tar.gz" - "ARROW_GTEST_URL gtest-${ARROW_GTEST_BUILD_VERSION}.tar.gz https://github.com/google/googletest/archive/release-${ARROW_GTEST_BUILD_VERSION}.tar.gz" + "ARROW_GTEST_URL gtest-${ARROW_GTEST_BUILD_VERSION}.tar.gz https://github.com/google/googletest/releases/download/v${ARROW_GTEST_BUILD_VERSION}/googletest-${ARROW_GTEST_BUILD_VERSION}.tar.gz" "ARROW_JEMALLOC_URL jemalloc-${ARROW_JEMALLOC_BUILD_VERSION}.tar.bz2 https://github.com/jemalloc/jemalloc/releases/download/${ARROW_JEMALLOC_BUILD_VERSION}/jemalloc-${ARROW_JEMALLOC_BUILD_VERSION}.tar.bz2" "ARROW_LZ4_URL lz4-${ARROW_LZ4_BUILD_VERSION}.tar.gz https://github.com/lz4/lz4/archive/${ARROW_LZ4_BUILD_VERSION}.tar.gz" "ARROW_MIMALLOC_URL mimalloc-${ARROW_MIMALLOC_BUILD_VERSION}.tar.gz https://github.com/microsoft/mimalloc/archive/${ARROW_MIMALLOC_BUILD_VERSION}.tar.gz" diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml index ff1834e63b91..4f3089ac3e5f 100644 --- a/dev/tasks/java-jars/github.yml +++ b/dev/tasks/java-jars/github.yml @@ -30,16 +30,17 @@ jobs: ARCH: {{ '${{ matrix.platform.archery_arch }}' }} ARCH_ALIAS: {{ '${{ matrix.platform.archery_arch_alias }}' }} ARCH_SHORT: {{ '${{ matrix.platform.archery_arch_short }}' }} + ARCHERY_USE_DOCKER_CLI: {{ "${{matrix.platform.archery_use_docker_cli || '1'}}" }} strategy: fail-fast: false matrix: platform: - - runs_on: ["ubuntu-latest"] + - runs_on: ubuntu-latest arch: "x86_64" archery_arch: "amd64" archery_arch_alias: "x86_64" archery_arch_short: "amd64" - - runs_on: ["buildjet-8vcpu-ubuntu-2204-arm"] + - runs_on: ubuntu-24.04-arm arch: "aarch_64" archery_arch: "arm64v8" archery_arch_alias: "aarch64" @@ -72,7 +73,7 @@ jobs: - name: Compress into single artifact to keep directory structure run: tar -cvzf arrow-shared-libs-linux-{{ arch }}.tar.gz arrow/java-dist/ - name: Upload artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 with: name: ubuntu-shared-lib-{{ arch }} path: arrow-shared-libs-linux-{{ arch }}.tar.gz @@ -91,33 +92,55 @@ jobs: fail-fast: false matrix: platform: - - { runs_on: ["macos-15-intel"], arch: "x86_64"} + - { runs_on: macos-15, arch: "aarch_64" } env: - MACOSX_DEPLOYMENT_TARGET: "12.0" + MACOSX_DEPLOYMENT_TARGET: "15.0" steps: {{ macros.github_checkout_arrow()|indent }} - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: cache: 'pip' python-version: 3.12 - name: Install Archery shell: bash run: pip install -e arrow/dev/archery[all] + - name: Checkout vcpkg + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: Microsoft/vcpkg + path: arrow/vcpkg + fetch-depth: 0 + - name: Install vcpkg + run: | + cd arrow/vcpkg + ./bootstrap-vcpkg.sh + echo "VCPKG_ROOT_LOCAL=${PWD}" >> ${GITHUB_ENV} + echo "${PWD}" >> ${GITHUB_PATH} + - name: Clean up disk space + run: | + echo "=== Free disk space before cleanup ===" + df -h / + + echo "" + echo "=== Removing Xcode simulators ===" + sudo rm -rf /Library/Developer/CoreSimulator/Caches || : + echo "Removed /Library/Developer/CoreSimulator/Caches" + + echo "" + echo "=== Removing user simulator data ===" + rm -rf ~/Library/Developer/CoreSimulator || : + echo "Removed ~/Library/Developer/CoreSimulator" + + echo "" + echo "=== Free disk space after cleanup ===" + df -h / - name: Install dependencies run: | - # We want to use llvm@14 to avoid shared z3 - # dependency. llvm@14 doesn't depend on z3 and llvm depends - # on z3. And Homebrew's z3 provides only shared library. It - # doesn't provides static z3 because z3's CMake doesn't accept - # building both shared and static libraries at once. - # See also: Z3_BUILD_LIBZ3_SHARED in - # https://github.com/Z3Prover/z3/blob/master/README-CMake.md - # - # If llvm is installed, Apache Arrow C++ uses llvm rather than - # llvm@14 because llvm is newer than llvm@14. - brew uninstall llvm || : + echo "=== Free disk space at start of dependency installation ===" + df -h / + echo "" # Ensure updating python@XXX with the "--overwrite" option. # If python@XXX is updated without "--overwrite", it causes # a conflict error. Because Python 3 installed not by @@ -125,64 +148,129 @@ jobs: # Homebrew's python@XXX is updated without "--overwrite", it # tries to replace /usr/local/bin/2to3 and so on and causes # a conflict error. - # brew update + brew update for python_package in $(brew list | grep python@); do brew install --overwrite ${python_package} done brew install --overwrite python + if [ "$(uname -m)" = "arm64" ]; then + # pkg-config formula is deprecated but it's still installed + # in GitHub Actions runner now. We can remove this once + # pkg-config formula is removed from GitHub Actions runner. + brew uninstall pkg-config || : + fi + + # Install basic build tools via brew (vcpkg needs these) + brew install cmake ninja pkg-config brew bundle --file=arrow/cpp/Brewfile + + # Clean up any existing LLVM installations in favor of vcpkg. + # Need to uninstall all versioned LLVM packages (llvm@18, llvm@17, etc.) + for llvm_pkg in $(brew list | grep -E '^llvm(@[0-9]+)?$'); do + brew uninstall "${llvm_pkg}" || : + done + # We want to link aws-sdk-cpp statically but Homebrew's # aws-sdk-cpp provides only shared library. If we have # Homebrew's aws-sdk-cpp, our build mix Homebrew's # aws-sdk-cpp and bundled aws-sdk-cpp. We uninstall Homebrew's # aws-sdk-cpp to ensure using only bundled aws-sdk-cpp. - brew uninstall aws-sdk-cpp + brew uninstall aws-sdk-cpp || : # We want to use bundled RE2 for static linking. If # Homebrew's RE2 is installed, its header file may be used. # We uninstall Homebrew's RE2 to ensure using bundled RE2. brew uninstall grpc || : # gRPC depends on RE2 - brew uninstall re2 + brew uninstall re2 || : # We want to use bundled Protobuf for static linking. If # Homebrew's Protobuf is installed, its library file may be # used on test We uninstall Homebrew's Protobuf to ensure using # bundled Protobuf. - brew uninstall protobuf - # fix cmake and boost versionsAdd commentMore actions - brew uninstall -f boost || true - brew uninstall -f cmake || true - mkdir -p homebrew-custom/Formula - curl -o homebrew-custom/Formula/cmake.rb https://raw.githubusercontent.com/Homebrew/homebrew-core/f68532bfe5cb87474093df8a839c3818c6aa44dd/Formula/c/cmake.rb - curl -o homebrew-custom/Formula/boost.rb https://raw.githubusercontent.com/Homebrew/homebrew-core/23f9c56c5075dd56b4471e2c93f89f6400b49ddd/Formula/b/boost.rb - brew tap-new local/homebrew-custom - cp ./homebrew-custom/Formula/*.rb "$(brew --repo local/homebrew-custom)/Formula/" - brew install -v local/homebrew-custom/cmake - brew install -v local/homebrew-custom/boost - brew pin cmake - brew pin boost - # + brew uninstall protobuf || : + + echo "" + echo "=== Free disk space before LLVM build ===" + df -h / + + echo "" + # Use vcpkg to install LLVM. + # Create overlay directory if it doesn't exist + mkdir -p arrow/ci/vcpkg/overlay/llvm + vcpkg install \ + --clean-after-build \ + --vcpkg-root=${VCPKG_ROOT_LOCAL} \ + --x-install-root=${VCPKG_ROOT_LOCAL}/installed \ + --x-manifest-root=arrow/ci/vcpkg \ + --overlay-ports=arrow/ci/vcpkg/overlay/llvm/ \ + --x-feature=gandiva + echo "" + echo "=== Free disk space after LLVM build ===" + df -h / + echo "" brew bundle --file=arrow/java/Brewfile - name: Build C++ libraries env: ARROW_USE_CCACHE: "ON" run: | set -e + echo "=== Free disk space at start of build ===" + df -h / + + echo "" # make brew Java available to CMake export JAVA_HOME=$(brew --prefix openjdk@11)/libexec/openjdk.jdk/Contents/Home arrow/ci/scripts/java_jni_macos_build.sh \ $GITHUB_WORKSPACE/arrow \ $GITHUB_WORKSPACE/arrow/cpp-build \ $GITHUB_WORKSPACE/arrow/java-dist + + echo "" + echo "=== Free disk space at end of build ===" + df -h / - name: Compress into single artifact to keep directory structure run: tar -cvzf arrow-shared-libs-macos-{{ arch }}.tar.gz arrow/java-dist/ - name: Upload artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 with: name: macos-shared-lib-{{ arch }} path: arrow-shared-libs-macos-{{ arch }}.tar.gz + build-cpp-windows: + name: Build C++ libraries Windows + runs-on: windows-2019 + steps: + {{ macros.github_checkout_arrow()|indent }} + - name: Set up Java + uses: actions/setup-java@v4 + with: + java-version: '11' + distribution: 'temurin' + - name: Download Timezone Database + shell: bash + run: arrow/ci/scripts/download_tz_database.sh + - name: Install sccache + shell: bash + run: arrow/ci/scripts/install_sccache.sh pc-windows-msvc $(pwd)/sccache + - name: Build C++ libraries + shell: cmd + env: + {{ macros.github_set_sccache_envvars()|indent(8) }} + run: | + call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 + REM For ORC + set TZDIR=/c/msys64/usr/share/zoneinfo + bash -c "arrow/ci/scripts/java_jni_windows_build.sh $(pwd)/arrow $(pwd)/arrow/cpp-build $(pwd)/arrow/java-dist" + - name: Compress into single artifact to keep directory structure + shell: bash + run: tar -cvzf arrow-shared-libs-windows.tar.gz arrow/java-dist/ + - name: Upload artifacts + uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 + with: + name: windows-shared-lib + path: arrow-shared-libs-windows.tar.gz + package-jars: name: Build jar files runs-on: {{ '${{ matrix.platform.runs_on }}' }} @@ -190,14 +278,15 @@ jobs: fail-fast: false matrix: platform: - - { runs_on: ["macos-15-intel"], arch: "x86_64"} + - { runs_on: macos-14-large, arch: "x86_64"} needs: - build-cpp-ubuntu - build-cpp-macos + - build-cpp-windows steps: {{ macros.github_checkout_arrow(fetch_depth=0)|indent }} - name: Download Libraries - uses: actions/download-artifact@v4 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: path: artifacts - name: Decompress artifacts @@ -206,6 +295,8 @@ jobs: tar -xvzf arrow-shared-libs-linux-x86_64.tar.gz tar -xvzf arrow-shared-libs-linux-aarch_64.tar.gz tar -xvzf arrow-shared-libs-macos-x86_64.tar.gz + tar -xvzf arrow-shared-libs-macos-aarch_64.tar.gz + tar -xvzf arrow-shared-libs-windows.tar.gz - name: Test that shared libraries exist run: | set -x @@ -224,6 +315,15 @@ jobs: test -f arrow/java-dist/arrow_dataset_jni/x86_64/libarrow_dataset_jni.dylib test -f arrow/java-dist/arrow_orc_jni/x86_64/libarrow_orc_jni.dylib test -f arrow/java-dist/gandiva_jni/x86_64/libgandiva_jni.dylib + + test -f arrow/java-dist/arrow_cdata_jni/aarch_64/libarrow_cdata_jni.dylib + test -f arrow/java-dist/arrow_dataset_jni/aarch_64/libarrow_dataset_jni.dylib + test -f arrow/java-dist/arrow_orc_jni/aarch_64/libarrow_orc_jni.dylib + test -f arrow/java-dist/gandiva_jni/aarch_64/libgandiva_jni.dylib + + test -f arrow/java-dist/arrow_cdata_jni/x86_64/arrow_cdata_jni.dll + test -f arrow/java-dist/arrow_dataset_jni/x86_64/arrow_dataset_jni.dll + test -f arrow/java-dist/arrow_orc_jni/x86_64/arrow_orc_jni.dll - name: Build bundled jar env: MAVEN_ARGS: >- @@ -233,6 +333,7 @@ jobs: pushd arrow/java mvn versions:set -DnewVersion={{ arrow.no_rc_snapshot_version }} mvn versions:set -DnewVersion={{ arrow.no_rc_snapshot_version }} -f bom + mvn versions:set -DnewVersion={{ arrow.no_rc_snapshot_version }} -f maven popd arrow/ci/scripts/java_full_build.sh \ $GITHUB_WORKSPACE/arrow \ @@ -242,3 +343,4 @@ jobs: "arrow/java-dist/*.pom", "arrow/java-dist/*.xml", "arrow/java-dist/*.zip"])|indent }} +