From 26818682dcd3d0e6ff9b3ce3d1144fa96c2a1aaa Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Wed, 25 Feb 2026 21:04:10 -0600 Subject: [PATCH 01/18] Upgrade Java JAR build workflow with modern improvements - Update GitHub Actions to v4+ with SHA pinning - Add ARM64 support for Ubuntu and macOS - Use vcpkg for LLVM to remove z3 runtime dependency - Add disk space cleanup for macOS runners - Add disk space monitoring throughout build - Enhance testing for ARM64 libraries Ref: DX-103340 --- dev/tasks/java-jars/github.yml | 176 ++++++++++++++++++++++++++------- 1 file changed, 139 insertions(+), 37 deletions(-) diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml index ff1834e63b91..e1fa8506af00 100644 --- a/dev/tasks/java-jars/github.yml +++ b/dev/tasks/java-jars/github.yml @@ -30,16 +30,17 @@ jobs: ARCH: {{ '${{ matrix.platform.archery_arch }}' }} ARCH_ALIAS: {{ '${{ matrix.platform.archery_arch_alias }}' }} ARCH_SHORT: {{ '${{ matrix.platform.archery_arch_short }}' }} + ARCHERY_USE_DOCKER_CLI: {{ "${{matrix.platform.archery_use_docker_cli || '1'}}" }} strategy: fail-fast: false matrix: platform: - - runs_on: ["ubuntu-latest"] + - runs_on: ubuntu-latest arch: "x86_64" archery_arch: "amd64" archery_arch_alias: "x86_64" archery_arch_short: "amd64" - - runs_on: ["buildjet-8vcpu-ubuntu-2204-arm"] + - runs_on: ubuntu-24.04-arm arch: "aarch_64" archery_arch: "arm64v8" archery_arch_alias: "aarch64" @@ -72,7 +73,7 @@ jobs: - name: Compress into single artifact to keep directory structure run: tar -cvzf arrow-shared-libs-linux-{{ arch }}.tar.gz arrow/java-dist/ - name: Upload artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 with: name: ubuntu-shared-lib-{{ arch }} path: arrow-shared-libs-linux-{{ arch }}.tar.gz @@ -91,33 +92,55 @@ jobs: fail-fast: false matrix: platform: - - { runs_on: ["macos-15-intel"], arch: "x86_64"} + - { runs_on: macos-15, arch: "aarch_64" } env: - MACOSX_DEPLOYMENT_TARGET: "12.0" + MACOSX_DEPLOYMENT_TARGET: "15.0" steps: {{ macros.github_checkout_arrow()|indent }} - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: cache: 'pip' python-version: 3.12 - name: Install Archery shell: bash run: pip install -e arrow/dev/archery[all] + - name: Checkout vcpkg + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: Microsoft/vcpkg + path: arrow/vcpkg + fetch-depth: 0 + - name: Install vcpkg + run: | + cd arrow/vcpkg + ./bootstrap-vcpkg.sh + echo "VCPKG_ROOT_LOCAL=${PWD}" >> ${GITHUB_ENV} + echo "${PWD}" >> ${GITHUB_PATH} + - name: Clean up disk space + run: | + echo "=== Free disk space before cleanup ===" + df -h / + + echo "" + echo "=== Removing Xcode simulators ===" + sudo rm -rf /Library/Developer/CoreSimulator/Caches || : + echo "Removed /Library/Developer/CoreSimulator/Caches" + + echo "" + echo "=== Removing user simulator data ===" + rm -rf ~/Library/Developer/CoreSimulator || : + echo "Removed ~/Library/Developer/CoreSimulator" + + echo "" + echo "=== Free disk space after cleanup ===" + df -h / - name: Install dependencies run: | - # We want to use llvm@14 to avoid shared z3 - # dependency. llvm@14 doesn't depend on z3 and llvm depends - # on z3. And Homebrew's z3 provides only shared library. It - # doesn't provides static z3 because z3's CMake doesn't accept - # building both shared and static libraries at once. - # See also: Z3_BUILD_LIBZ3_SHARED in - # https://github.com/Z3Prover/z3/blob/master/README-CMake.md - # - # If llvm is installed, Apache Arrow C++ uses llvm rather than - # llvm@14 because llvm is newer than llvm@14. - brew uninstall llvm || : + echo "=== Free disk space at start of dependency installation ===" + df -h / + echo "" # Ensure updating python@XXX with the "--overwrite" option. # If python@XXX is updated without "--overwrite", it causes # a conflict error. Because Python 3 installed not by @@ -125,64 +148,129 @@ jobs: # Homebrew's python@XXX is updated without "--overwrite", it # tries to replace /usr/local/bin/2to3 and so on and causes # a conflict error. - # brew update + brew update for python_package in $(brew list | grep python@); do brew install --overwrite ${python_package} done brew install --overwrite python + if [ "$(uname -m)" = "arm64" ]; then + # pkg-config formula is deprecated but it's still installed + # in GitHub Actions runner now. We can remove this once + # pkg-config formula is removed from GitHub Actions runner. + brew uninstall pkg-config || : + fi + + # Install basic build tools via brew (vcpkg needs these) + brew install cmake ninja pkg-config brew bundle --file=arrow/cpp/Brewfile + + # Clean up any existing LLVM installations in favor of vcpkg. + # Need to uninstall all versioned LLVM packages (llvm@18, llvm@17, etc.) + for llvm_pkg in $(brew list | grep -E '^llvm(@[0-9]+)?$'); do + brew uninstall "${llvm_pkg}" || : + done + # We want to link aws-sdk-cpp statically but Homebrew's # aws-sdk-cpp provides only shared library. If we have # Homebrew's aws-sdk-cpp, our build mix Homebrew's # aws-sdk-cpp and bundled aws-sdk-cpp. We uninstall Homebrew's # aws-sdk-cpp to ensure using only bundled aws-sdk-cpp. - brew uninstall aws-sdk-cpp + brew uninstall aws-sdk-cpp || : # We want to use bundled RE2 for static linking. If # Homebrew's RE2 is installed, its header file may be used. # We uninstall Homebrew's RE2 to ensure using bundled RE2. brew uninstall grpc || : # gRPC depends on RE2 - brew uninstall re2 + brew uninstall re2 || : # We want to use bundled Protobuf for static linking. If # Homebrew's Protobuf is installed, its library file may be # used on test We uninstall Homebrew's Protobuf to ensure using # bundled Protobuf. - brew uninstall protobuf - # fix cmake and boost versionsAdd commentMore actions - brew uninstall -f boost || true - brew uninstall -f cmake || true - mkdir -p homebrew-custom/Formula - curl -o homebrew-custom/Formula/cmake.rb https://raw.githubusercontent.com/Homebrew/homebrew-core/f68532bfe5cb87474093df8a839c3818c6aa44dd/Formula/c/cmake.rb - curl -o homebrew-custom/Formula/boost.rb https://raw.githubusercontent.com/Homebrew/homebrew-core/23f9c56c5075dd56b4471e2c93f89f6400b49ddd/Formula/b/boost.rb - brew tap-new local/homebrew-custom - cp ./homebrew-custom/Formula/*.rb "$(brew --repo local/homebrew-custom)/Formula/" - brew install -v local/homebrew-custom/cmake - brew install -v local/homebrew-custom/boost - brew pin cmake - brew pin boost - # + brew uninstall protobuf || : + + echo "" + echo "=== Free disk space before LLVM build ===" + df -h / + + echo "" + # Use vcpkg to install LLVM. + # Create overlay directory if it doesn't exist + mkdir -p arrow/ci/vcpkg/overlay/llvm + vcpkg install \ + --clean-after-build \ + --vcpkg-root=${VCPKG_ROOT_LOCAL} \ + --x-install-root=${VCPKG_ROOT_LOCAL}/installed \ + --x-manifest-root=arrow/ci/vcpkg \ + --overlay-ports=arrow/ci/vcpkg/overlay/llvm/ \ + --x-feature=gandiva + echo "" + echo "=== Free disk space after LLVM build ===" + df -h / + echo "" brew bundle --file=arrow/java/Brewfile - name: Build C++ libraries env: ARROW_USE_CCACHE: "ON" run: | set -e + echo "=== Free disk space at start of build ===" + df -h / + + echo "" # make brew Java available to CMake export JAVA_HOME=$(brew --prefix openjdk@11)/libexec/openjdk.jdk/Contents/Home arrow/ci/scripts/java_jni_macos_build.sh \ $GITHUB_WORKSPACE/arrow \ $GITHUB_WORKSPACE/arrow/cpp-build \ $GITHUB_WORKSPACE/arrow/java-dist + + echo "" + echo "=== Free disk space at end of build ===" + df -h / - name: Compress into single artifact to keep directory structure run: tar -cvzf arrow-shared-libs-macos-{{ arch }}.tar.gz arrow/java-dist/ - name: Upload artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 with: name: macos-shared-lib-{{ arch }} path: arrow-shared-libs-macos-{{ arch }}.tar.gz + build-cpp-windows: + name: Build C++ libraries Windows + runs-on: windows-2019 + steps: + {{ macros.github_checkout_arrow()|indent }} + - name: Set up Java + uses: actions/setup-java@v4 + with: + java-version: '11' + distribution: 'temurin' + - name: Download Timezone Database + shell: bash + run: arrow/ci/scripts/download_tz_database.sh + - name: Install sccache + shell: bash + run: arrow/ci/scripts/install_sccache.sh pc-windows-msvc $(pwd)/sccache + - name: Build C++ libraries + shell: cmd + env: + {{ macros.github_set_sccache_envvars()|indent(8) }} + run: | + call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 + REM For ORC + set TZDIR=/c/msys64/usr/share/zoneinfo + bash -c "arrow/ci/scripts/java_jni_windows_build.sh $(pwd)/arrow $(pwd)/arrow/cpp-build $(pwd)/arrow/java-dist" + - name: Compress into single artifact to keep directory structure + shell: bash + run: tar -cvzf arrow-shared-libs-windows.tar.gz arrow/java-dist/ + - name: Upload artifacts + uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 + with: + name: windows-shared-lib + path: arrow-shared-libs-windows.tar.gz + package-jars: name: Build jar files runs-on: {{ '${{ matrix.platform.runs_on }}' }} @@ -190,14 +278,15 @@ jobs: fail-fast: false matrix: platform: - - { runs_on: ["macos-15-intel"], arch: "x86_64"} + - { runs_on: macos-13, arch: "x86_64"} needs: - build-cpp-ubuntu - build-cpp-macos + - build-cpp-windows steps: {{ macros.github_checkout_arrow(fetch_depth=0)|indent }} - name: Download Libraries - uses: actions/download-artifact@v4 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: path: artifacts - name: Decompress artifacts @@ -206,6 +295,8 @@ jobs: tar -xvzf arrow-shared-libs-linux-x86_64.tar.gz tar -xvzf arrow-shared-libs-linux-aarch_64.tar.gz tar -xvzf arrow-shared-libs-macos-x86_64.tar.gz + tar -xvzf arrow-shared-libs-macos-aarch_64.tar.gz + tar -xvzf arrow-shared-libs-windows.tar.gz - name: Test that shared libraries exist run: | set -x @@ -224,6 +315,15 @@ jobs: test -f arrow/java-dist/arrow_dataset_jni/x86_64/libarrow_dataset_jni.dylib test -f arrow/java-dist/arrow_orc_jni/x86_64/libarrow_orc_jni.dylib test -f arrow/java-dist/gandiva_jni/x86_64/libgandiva_jni.dylib + + test -f arrow/java-dist/arrow_cdata_jni/aarch_64/libarrow_cdata_jni.dylib + test -f arrow/java-dist/arrow_dataset_jni/aarch_64/libarrow_dataset_jni.dylib + test -f arrow/java-dist/arrow_orc_jni/aarch_64/libarrow_orc_jni.dylib + test -f arrow/java-dist/gandiva_jni/aarch_64/libgandiva_jni.dylib + + test -f arrow/java-dist/arrow_cdata_jni/x86_64/arrow_cdata_jni.dll + test -f arrow/java-dist/arrow_dataset_jni/x86_64/arrow_dataset_jni.dll + test -f arrow/java-dist/arrow_orc_jni/x86_64/arrow_orc_jni.dll - name: Build bundled jar env: MAVEN_ARGS: >- @@ -233,6 +333,7 @@ jobs: pushd arrow/java mvn versions:set -DnewVersion={{ arrow.no_rc_snapshot_version }} mvn versions:set -DnewVersion={{ arrow.no_rc_snapshot_version }} -f bom + mvn versions:set -DnewVersion={{ arrow.no_rc_snapshot_version }} -f maven popd arrow/ci/scripts/java_full_build.sh \ $GITHUB_WORKSPACE/arrow \ @@ -242,3 +343,4 @@ jobs: "arrow/java-dist/*.pom", "arrow/java-dist/*.xml", "arrow/java-dist/*.zip"])|indent }} + From 4a33471596588f698df3e8bcbc958fede4501f36 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Wed, 25 Feb 2026 23:46:15 -0600 Subject: [PATCH 02/18] Fix C++ code formatting (clang-format) --- cpp/src/arrow/buffer.h | 19 +- cpp/src/gandiva/annotator.cc | 39 +- cpp/src/gandiva/array_ops.cc | 437 ++++++++++--------- cpp/src/gandiva/array_ops.h | 69 +-- cpp/src/gandiva/array_ops_test.cc | 7 +- cpp/src/gandiva/encrypt_mode_dispatcher.cc | 69 ++- cpp/src/gandiva/encrypt_mode_dispatcher.h | 21 +- cpp/src/gandiva/encrypt_utils_cbc.cc | 15 +- cpp/src/gandiva/encrypt_utils_cbc.h | 11 +- cpp/src/gandiva/encrypt_utils_cbc_test.cc | 37 +- cpp/src/gandiva/encrypt_utils_common.cc | 3 +- cpp/src/gandiva/encrypt_utils_common.h | 7 +- cpp/src/gandiva/encrypt_utils_common_test.cc | 3 +- cpp/src/gandiva/encrypt_utils_ecb.cc | 5 +- cpp/src/gandiva/encrypt_utils_ecb.h | 3 +- cpp/src/gandiva/encrypt_utils_ecb_test.cc | 58 ++- cpp/src/gandiva/encrypt_utils_gcm.cc | 19 +- cpp/src/gandiva/encrypt_utils_gcm.h | 17 +- cpp/src/gandiva/encrypt_utils_gcm_test.cc | 36 +- cpp/src/gandiva/engine.cc | 2 +- cpp/src/gandiva/field_descriptor.h | 10 +- cpp/src/gandiva/function_registry.cc | 4 +- cpp/src/gandiva/function_registry_string.cc | 18 +- cpp/src/gandiva/gdv_function_stubs.cc | 210 ++++----- cpp/src/gandiva/gdv_function_stubs.h | 78 ++-- cpp/src/gandiva/gdv_function_stubs_test.cc | 179 ++++---- cpp/src/gandiva/llvm_generator.cc | 76 ++-- cpp/src/gandiva/llvm_types.h | 10 +- cpp/src/gandiva/lvalue.h | 5 +- cpp/src/gandiva/precompiled/types.h | 1 - cpp/src/gandiva/projector.cc | 28 +- cpp/src/gandiva/projector.h | 1 - cpp/src/gandiva/tests/list_test.cc | 24 +- cpp/src/gandiva/tests/projector_test.cc | 7 +- 34 files changed, 747 insertions(+), 781 deletions(-) diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h index 4e47fd293251..0445c37da0a4 100644 --- a/cpp/src/arrow/buffer.h +++ b/cpp/src/arrow/buffer.h @@ -522,10 +522,10 @@ class ARROW_EXPORT ResizableBuffer : public MutableBuffer { } public: - uint8_t* offsetBuffer; - int64_t offsetCapacity; - uint8_t* validityBuffer; - uint8_t* outerValidityBuffer; + uint8_t* offsetBuffer; + int64_t offsetCapacity; + uint8_t* validityBuffer; + uint8_t* outerValidityBuffer; protected: ResizableBuffer(uint8_t* data, int64_t size) : MutableBuffer(data, size) { @@ -533,15 +533,14 @@ class ARROW_EXPORT ResizableBuffer : public MutableBuffer { offsetCapacity = 0; validityBuffer = nullptr; outerValidityBuffer = nullptr; - } ResizableBuffer(uint8_t* data, int64_t size, std::shared_ptr mm) : MutableBuffer(data, size, std::move(mm)) { - offsetBuffer = nullptr; - offsetCapacity = 0; - validityBuffer = nullptr; - outerValidityBuffer = nullptr; - } + offsetBuffer = nullptr; + offsetCapacity = 0; + validityBuffer = nullptr; + outerValidityBuffer = nullptr; + } }; /// \defgroup buffer-allocation-functions Functions for allocating buffers diff --git a/cpp/src/gandiva/annotator.cc b/cpp/src/gandiva/annotator.cc index abd5ba6b1a4b..d98315de9a42 100644 --- a/cpp/src/gandiva/annotator.cc +++ b/cpp/src/gandiva/annotator.cc @@ -66,7 +66,8 @@ FieldDescriptorPtr Annotator::MakeDesc(FieldPtr field, bool is_output) { child_valid_buffer_ptr_idx = buffer_count_++; } return std::make_shared(field, data_idx, validity_idx, offsets_idx, - data_buffer_ptr_idx, child_offsets_idx, child_valid_buffer_ptr_idx); + data_buffer_ptr_idx, child_offsets_idx, + child_valid_buffer_ptr_idx); } int Annotator::AddHolderPointer(void* holder) { @@ -102,11 +103,11 @@ void Annotator::PrepareBuffersForField(const FieldDescriptor& desc, eval_batch->SetBuffer(desc.child_data_offsets_idx(), child_offsets_buf, array_data.child_data.at(0)->offset); - uint8_t* child_valid_buf = reinterpret_cast( - array_data.child_data.at(0)->buffers[0].get()); + uint8_t* child_valid_buf = + reinterpret_cast(array_data.child_data.at(0)->buffers[0].get()); eval_batch->SetBuffer(desc.child_data_validity_idx(), child_valid_buf, array_data.child_data.at(0)->offset); - + } else { // if list field is input field, just put buffer data into eval batch uint8_t* child_offsets_buf = const_cast( @@ -114,18 +115,18 @@ void Annotator::PrepareBuffersForField(const FieldDescriptor& desc, eval_batch->SetBuffer(desc.child_data_offsets_idx(), child_offsets_buf, array_data.child_data.at(0)->offset); - uint8_t* child_valid_buf = const_cast( - array_data.child_data.at(0)->buffers[0]->data()); + uint8_t* child_valid_buf = + const_cast(array_data.child_data.at(0)->buffers[0]->data()); eval_batch->SetBuffer(desc.child_data_offsets_idx(), child_valid_buf, array_data.child_data.at(0)->offset); } } if (array_data.type->id() != arrow::Type::LIST || arrow::is_binary_like(array_data.type->field(0)->type()->id())) { - // primitive type list data buffer index is 1 - // binary like type list data buffer index is 2 - ++buffer_idx; - } + // primitive type list data buffer index is 1 + // binary like type list data buffer index is 2 + ++buffer_idx; + } } int const childDataIndex = 0; @@ -133,17 +134,18 @@ void Annotator::PrepareBuffersForField(const FieldDescriptor& desc, uint8_t* data_buf = const_cast(array_data.buffers[buffer_idx]->data()); eval_batch->SetBuffer(desc.data_idx(), data_buf, array_data.offset); } else { - uint8_t* data_buf = - const_cast(array_data.child_data.at(childDataIndex)->buffers[buffer_idx]->data()); + uint8_t* data_buf = const_cast( + array_data.child_data.at(childDataIndex)->buffers[buffer_idx]->data()); eval_batch->SetBuffer(desc.data_idx(), data_buf, array_data.child_data.at(0)->offset); - + int const childDataBufferIndex = 0; - if (array_data.child_data.at(childDataIndex)->buffers[childDataBufferIndex] ) { - uint8_t* child_valid_buf = const_cast( - array_data.child_data.at(childDataIndex)->buffers[childDataBufferIndex]->data()); - eval_batch->SetBuffer(desc.child_data_validity_idx(), child_valid_buf, 0); + if (array_data.child_data.at(childDataIndex)->buffers[childDataBufferIndex]) { + uint8_t* child_valid_buf = + const_cast(array_data.child_data.at(childDataIndex) + ->buffers[childDataBufferIndex] + ->data()); + eval_batch->SetBuffer(desc.child_data_validity_idx(), child_valid_buf, 0); } - } if (is_output) { @@ -161,7 +163,6 @@ void Annotator::PrepareBuffersForField(const FieldDescriptor& desc, array_data.child_data.at(0)->offset); } } - } EvalBatchPtr Annotator::PrepareEvalBatch(const arrow::RecordBatch& record_batch, diff --git a/cpp/src/gandiva/array_ops.cc b/cpp/src/gandiva/array_ops.cc index 64548bf09abb..cc6e9ef281a7 100644 --- a/cpp/src/gandiva/array_ops.cc +++ b/cpp/src/gandiva/array_ops.cc @@ -23,31 +23,32 @@ #include "arrow/util/value_parsing.h" -#include "gandiva/gdv_function_stubs.h" #include "gandiva/engine.h" #include "gandiva/exported_funcs.h" +#include "gandiva/gdv_function_stubs.h" /// Stub functions that can be accessed from LLVM or the pre-compiled library. template -Type* array_remove_template(int64_t context_ptr, const Type* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - Type remove_data, bool remove_data_valid, - int64_t loop_var, int64_t validity_index_var, - bool* valid_row, int32_t* out_len, int32_t** valid_ptr) -{ +Type* array_remove_template(int64_t context_ptr, const Type* entry_buf, int32_t entry_len, + const int32_t* entry_validity, bool combined_row_validity, + Type remove_data, bool remove_data_valid, int64_t loop_var, + int64_t validity_index_var, bool* valid_row, int32_t* out_len, + int32_t** valid_ptr) { std::vector newInts; - const int32_t* entry_validityAdjusted = entry_validity - (loop_var ); + const int32_t* entry_validityAdjusted = entry_validity - (loop_var); int64_t validityBitIndex = 0; - //The validity index already has the current row length added to it, so decrement. + // The validity index already has the current row length added to it, so decrement. validityBitIndex = validity_index_var - entry_len; std::vector outValid; for (int i = 0; i < entry_len; i++) { Type entry_item = *(entry_buf + i); if (remove_data_valid && entry_item == remove_data) { - //Do not add the item to remove. - } else if (!arrow::bit_util::GetBit(reinterpret_cast(entry_validityAdjusted), validityBitIndex + i)) { + // Do not add the item to remove. + } else if (!arrow::bit_util::GetBit( + reinterpret_cast(entry_validityAdjusted), + validityBitIndex + i)) { outValid.push_back(false); newInts.push_back(0); } else { @@ -58,8 +59,8 @@ Type* array_remove_template(int64_t context_ptr, const Type* entry_buf, *out_len = (int)newInts.size(); - //Since this function can remove values we don't know the length ahead of time. - //A fast way to compute Math.ceil(input / 8.0). + // Since this function can remove values we don't know the length ahead of time. + // A fast way to compute Math.ceil(input / 8.0). int validByteSize = (unsigned int)((*out_len) + 7) >> 3; uint8_t* validRet = gdv_fn_context_arena_malloc(context_ptr, validByteSize); @@ -68,15 +69,15 @@ Type* array_remove_template(int64_t context_ptr, const Type* entry_buf, } int32_t outBufferLength = (int)*out_len * sizeof(Type); - //length is number of items, but buffers must account for byte size. + // length is number of items, but buffers must account for byte size. uint8_t* ret = gdv_fn_context_arena_malloc(context_ptr, outBufferLength); memcpy(ret, newInts.data(), outBufferLength); *valid_row = true; - //Return null if the input array is null or the data to remove is null. + // Return null if the input array is null or the data to remove is null. if (!combined_row_validity || !remove_data_valid) { *out_len = 0; - *valid_row = false; //this one is what works for the top level validity. + *valid_row = false; // this one is what works for the top level validity. } *valid_ptr = reinterpret_cast(validRet); @@ -84,23 +85,24 @@ Type* array_remove_template(int64_t context_ptr, const Type* entry_buf, } template -bool array_contains_template(const Type* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - Type contains_data, bool contains_data_valid, - int64_t loop_var, int64_t validity_index_var, - bool* valid_row) { +bool array_contains_template(const Type* entry_buf, int32_t entry_len, + const int32_t* entry_validity, bool combined_row_validity, + Type contains_data, bool contains_data_valid, + int64_t loop_var, int64_t validity_index_var, + bool* valid_row) { if (!combined_row_validity || !contains_data_valid) { *valid_row = false; return false; } *valid_row = true; - const int32_t* entry_validityAdjusted = entry_validity - (loop_var ); + const int32_t* entry_validityAdjusted = entry_validity - (loop_var); int64_t validityBitIndex = validity_index_var - entry_len; - + bool found_null_in_data = false; for (int i = 0; i < entry_len; i++) { - if (!arrow::bit_util::GetBit(reinterpret_cast(entry_validityAdjusted), validityBitIndex + i)) { + if (!arrow::bit_util::GetBit(reinterpret_cast(entry_validityAdjusted), + validityBitIndex + i)) { found_null_in_data = true; continue; } @@ -109,7 +111,7 @@ bool array_contains_template(const Type* entry_buf, return true; } } - //If there is null in the input and the item is not found the result is null. + // If there is null in the input and the item is not found the result is null. if (found_null_in_data) { *valid_row = false; } @@ -119,94 +121,91 @@ bool array_contains_template(const Type* entry_buf, extern "C" { bool array_int32_contains_int32(int64_t context_ptr, const int32_t* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - int32_t contains_data, bool contains_data_valid, - int64_t loop_var, int64_t validity_index_var, - bool* valid_row) { - return array_contains_template(entry_buf, entry_len, entry_validity, - combined_row_validity, contains_data, contains_data_valid, - loop_var, validity_index_var, valid_row); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, int32_t contains_data, + bool contains_data_valid, int64_t loop_var, + int64_t validity_index_var, bool* valid_row) { + return array_contains_template( + entry_buf, entry_len, entry_validity, combined_row_validity, contains_data, + contains_data_valid, loop_var, validity_index_var, valid_row); } bool array_int64_contains_int64(int64_t context_ptr, const int64_t* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - int64_t contains_data, bool contains_data_valid, - int64_t loop_var, int64_t validity_index_var, - bool* valid_row) { - return array_contains_template(entry_buf, entry_len, entry_validity, - combined_row_validity, contains_data, contains_data_valid, - loop_var, validity_index_var, valid_row); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, int64_t contains_data, + bool contains_data_valid, int64_t loop_var, + int64_t validity_index_var, bool* valid_row) { + return array_contains_template( + entry_buf, entry_len, entry_validity, combined_row_validity, contains_data, + contains_data_valid, loop_var, validity_index_var, valid_row); } bool array_float32_contains_float32(int64_t context_ptr, const float* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - float contains_data, bool contains_data_valid, - int64_t loop_var, int64_t validity_index_var, - bool* valid_row) { - return array_contains_template(entry_buf, entry_len, entry_validity, - combined_row_validity, contains_data, contains_data_valid, - loop_var, validity_index_var, valid_row); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, float contains_data, + bool contains_data_valid, int64_t loop_var, + int64_t validity_index_var, bool* valid_row) { + return array_contains_template( + entry_buf, entry_len, entry_validity, combined_row_validity, contains_data, + contains_data_valid, loop_var, validity_index_var, valid_row); } bool array_float64_contains_float64(int64_t context_ptr, const double* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - double contains_data, bool contains_data_valid, - int64_t loop_var, int64_t validity_index_var, - bool* valid_row) { - return array_contains_template(entry_buf, entry_len, entry_validity, - combined_row_validity, contains_data, contains_data_valid, - loop_var, validity_index_var, valid_row); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, double contains_data, + bool contains_data_valid, int64_t loop_var, + int64_t validity_index_var, bool* valid_row) { + return array_contains_template( + entry_buf, entry_len, entry_validity, combined_row_validity, contains_data, + contains_data_valid, loop_var, validity_index_var, valid_row); } - - int32_t* array_int32_remove(int64_t context_ptr, const int32_t* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - int32_t remove_data, bool remove_data_valid, - int64_t loop_var, int64_t validity_index_var, - bool* valid_row, int32_t* out_len, int32_t** valid_ptr) { - return array_remove_template(context_ptr, entry_buf, - entry_len, entry_validity, combined_row_validity, - remove_data, remove_data_valid, - loop_var, validity_index_var, - valid_row, out_len, valid_ptr); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, int32_t remove_data, + bool remove_data_valid, int64_t loop_var, + int64_t validity_index_var, bool* valid_row, int32_t* out_len, + int32_t** valid_ptr) { + return array_remove_template(context_ptr, entry_buf, entry_len, entry_validity, + combined_row_validity, remove_data, + remove_data_valid, loop_var, validity_index_var, + valid_row, out_len, valid_ptr); } int64_t* array_int64_remove(int64_t context_ptr, const int64_t* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - int64_t remove_data, bool remove_data_valid, - int64_t loop_var, int64_t validity_index_var, - bool* valid_row, int32_t* out_len, int32_t** valid_ptr){ - return array_remove_template(context_ptr, entry_buf, - entry_len, entry_validity, combined_row_validity, - remove_data, remove_data_valid, - loop_var, validity_index_var, - valid_row, out_len, valid_ptr); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, int64_t remove_data, + bool remove_data_valid, int64_t loop_var, + int64_t validity_index_var, bool* valid_row, int32_t* out_len, + int32_t** valid_ptr) { + return array_remove_template(context_ptr, entry_buf, entry_len, entry_validity, + combined_row_validity, remove_data, + remove_data_valid, loop_var, validity_index_var, + valid_row, out_len, valid_ptr); } float* array_float32_remove(int64_t context_ptr, const float* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - float remove_data, bool remove_data_valid, - int64_t loop_var, int64_t validity_index_var, - bool* valid_row, int32_t* out_len, int32_t** valid_ptr){ - return array_remove_template(context_ptr, entry_buf, - entry_len, entry_validity, combined_row_validity, - remove_data, remove_data_valid, - loop_var, validity_index_var, - valid_row, out_len, valid_ptr); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, float remove_data, + bool remove_data_valid, int64_t loop_var, + int64_t validity_index_var, bool* valid_row, int32_t* out_len, + int32_t** valid_ptr) { + return array_remove_template(context_ptr, entry_buf, entry_len, entry_validity, + combined_row_validity, remove_data, + remove_data_valid, loop_var, validity_index_var, + valid_row, out_len, valid_ptr); } - double* array_float64_remove(int64_t context_ptr, const double* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - double remove_data, bool remove_data_valid, - int64_t loop_var, int64_t validity_index_var, - bool* valid_row, int32_t* out_len, int32_t** valid_ptr){ - return array_remove_template(context_ptr, entry_buf, - entry_len, entry_validity, combined_row_validity, - remove_data, remove_data_valid, - loop_var, validity_index_var, - valid_row, out_len, valid_ptr); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, double remove_data, + bool remove_data_valid, int64_t loop_var, + int64_t validity_index_var, bool* valid_row, + int32_t* out_len, int32_t** valid_ptr) { + return array_remove_template(context_ptr, entry_buf, entry_len, entry_validity, + combined_row_validity, remove_data, + remove_data_valid, loop_var, validity_index_var, + valid_row, out_len, valid_ptr); } } @@ -215,143 +214,155 @@ arrow::Status ExportedArrayFunctions::AddMappings(Engine* engine) const { std::vector args; auto types = engine->types(); - args = {types->i64_type(), // int64_t execution_context - types->i64_ptr_type(), // int8_t* data ptr - types->i32_type(), // int32_t data length - types->i32_ptr_type(), // input validity buffer - types->i1_type(), // bool input row validity - types->i32_type(), // int32_t value to check for - types->i1_type(), // bool validity --Needed? - types->i64_type(), //in loop var --Needed? - types->i64_type(), //in validity_index_var index into the valdity vector for the current row. - types->i1_ptr_type() //output validity for the row - }; + args = { + types->i64_type(), // int64_t execution_context + types->i64_ptr_type(), // int8_t* data ptr + types->i32_type(), // int32_t data length + types->i32_ptr_type(), // input validity buffer + types->i1_type(), // bool input row validity + types->i32_type(), // int32_t value to check for + types->i1_type(), // bool validity --Needed? + types->i64_type(), // in loop var --Needed? + types->i64_type(), // in validity_index_var index into the valdity vector for the + // current row. + types->i1_ptr_type() // output validity for the row + }; engine->AddGlobalMappingForFunc("array_int32_contains_int32", types->i1_type() /*return_type*/, args, reinterpret_cast(array_int32_contains_int32)); - args = {types->i64_type(), // int64_t execution_context - types->i64_ptr_type(), // int8_t* data ptr - types->i32_type(), // int32_t data length - types->i32_ptr_type(), // input validity buffer - types->i1_type(), // bool input row validity - types->i64_type(), // int32_t value to check for - types->i1_type(), // bool validity --Needed? - types->i64_type(), //in loop var --Needed? - types->i64_type(), //in validity_index_var index into the valdity vector for the current row. - types->i1_ptr_type() //output validity for the row - }; + args = { + types->i64_type(), // int64_t execution_context + types->i64_ptr_type(), // int8_t* data ptr + types->i32_type(), // int32_t data length + types->i32_ptr_type(), // input validity buffer + types->i1_type(), // bool input row validity + types->i64_type(), // int32_t value to check for + types->i1_type(), // bool validity --Needed? + types->i64_type(), // in loop var --Needed? + types->i64_type(), // in validity_index_var index into the valdity vector for the + // current row. + types->i1_ptr_type() // output validity for the row + }; engine->AddGlobalMappingForFunc("array_int64_contains_int64", types->i1_type() /*return_type*/, args, reinterpret_cast(array_int64_contains_int64)); - args = {types->i64_type(), // int64_t execution_context - types->float_ptr_type(), // int8_t* data ptr - types->i32_type(), // int32_t data length - types->i32_ptr_type(), // input validity buffer - types->i1_type(), // bool input row validity - types->float_type(), // int32_t value to check for - types->i1_type(), // bool validity --Needed? - types->i64_type(), //in loop var --Needed? - types->i64_type(), //in validity_index_var index into the valdity vector for the current row. - types->i1_ptr_type() //output validity for the row - }; - - engine->AddGlobalMappingForFunc("array_float32_contains_float32", - types->i1_type() /*return_type*/, args, - reinterpret_cast(array_float32_contains_float32)); - - args = {types->i64_type(), // int64_t execution_context - types->double_ptr_type(), // int8_t* data ptr - types->i32_type(), // int32_t data length - types->i32_ptr_type(), // input validity buffer - types->i1_type(), // bool input row validity - types->double_type(), // int32_t value to check for - types->i1_type(), // bool validity --Needed? - types->i64_type(), //in loop var --Needed? - types->i64_type(), //in validity_index_var index into the valdity vector for the current row. - types->i1_ptr_type() //output validity for the row - }; - - engine->AddGlobalMappingForFunc("array_float64_contains_float64", - types->i1_type() /*return_type*/, args, - reinterpret_cast(array_float64_contains_float64)); - //Array remove. - args = {types->i64_type(), // int64_t execution_context - types->i32_ptr_type(), // int8_t* input data ptr - types->i32_type(), // int32_t input length - types->i32_ptr_type(), // input validity buffer - types->i1_type(), // bool input row validity - types->i32_type(), //value to remove from input - types->i1_type(), // bool validity --Needed? - types->i64_type(), //in loop var --Needed? - types->i64_type(), //in validity_index_var index into the valdity vector for the current row. - types->i1_ptr_type(), //output validity for the row - types->i32_ptr_type(), // output array length - types->i32_ptr_type() //output pointer to new validity buffer - - }; - engine->AddGlobalMappingForFunc("array_int32_remove", - types->i32_ptr_type(), args, + args = { + types->i64_type(), // int64_t execution_context + types->float_ptr_type(), // int8_t* data ptr + types->i32_type(), // int32_t data length + types->i32_ptr_type(), // input validity buffer + types->i1_type(), // bool input row validity + types->float_type(), // int32_t value to check for + types->i1_type(), // bool validity --Needed? + types->i64_type(), // in loop var --Needed? + types->i64_type(), // in validity_index_var index into the valdity vector for the + // current row. + types->i1_ptr_type() // output validity for the row + }; + + engine->AddGlobalMappingForFunc( + "array_float32_contains_float32", types->i1_type() /*return_type*/, args, + reinterpret_cast(array_float32_contains_float32)); + + args = { + types->i64_type(), // int64_t execution_context + types->double_ptr_type(), // int8_t* data ptr + types->i32_type(), // int32_t data length + types->i32_ptr_type(), // input validity buffer + types->i1_type(), // bool input row validity + types->double_type(), // int32_t value to check for + types->i1_type(), // bool validity --Needed? + types->i64_type(), // in loop var --Needed? + types->i64_type(), // in validity_index_var index into the valdity vector for the + // current row. + types->i1_ptr_type() // output validity for the row + }; + + engine->AddGlobalMappingForFunc( + "array_float64_contains_float64", types->i1_type() /*return_type*/, args, + reinterpret_cast(array_float64_contains_float64)); + // Array remove. + args = { + types->i64_type(), // int64_t execution_context + types->i32_ptr_type(), // int8_t* input data ptr + types->i32_type(), // int32_t input length + types->i32_ptr_type(), // input validity buffer + types->i1_type(), // bool input row validity + types->i32_type(), // value to remove from input + types->i1_type(), // bool validity --Needed? + types->i64_type(), // in loop var --Needed? + types->i64_type(), // in validity_index_var index into the valdity vector for the + // current row. + types->i1_ptr_type(), // output validity for the row + types->i32_ptr_type(), // output array length + types->i32_ptr_type() // output pointer to new validity buffer + + }; + engine->AddGlobalMappingForFunc("array_int32_remove", types->i32_ptr_type(), args, reinterpret_cast(array_int32_remove)); - args = {types->i64_type(), // int64_t execution_context - types->i64_ptr_type(), // int8_t* input data ptr - types->i32_type(), // int32_t input length - types->i32_ptr_type(), // input validity buffer - types->i1_type(), // bool input row validity - types->i64_type(), //value to remove from input - types->i1_type(), // bool validity --Needed? - types->i64_type(), //in loop var --Needed? - types->i64_type(), //in validity_index_var index into the valdity vector for the current row. - types->i1_ptr_type(), //output validity for the row - types->i32_ptr_type(), // output array length - types->i32_ptr_type() //output pointer to new validity buffer - - }; - - engine->AddGlobalMappingForFunc("array_int64_remove", - types->i64_ptr_type(), args, + args = { + types->i64_type(), // int64_t execution_context + types->i64_ptr_type(), // int8_t* input data ptr + types->i32_type(), // int32_t input length + types->i32_ptr_type(), // input validity buffer + types->i1_type(), // bool input row validity + types->i64_type(), // value to remove from input + types->i1_type(), // bool validity --Needed? + types->i64_type(), // in loop var --Needed? + types->i64_type(), // in validity_index_var index into the valdity vector for the + // current row. + types->i1_ptr_type(), // output validity for the row + types->i32_ptr_type(), // output array length + types->i32_ptr_type() // output pointer to new validity buffer + + }; + + engine->AddGlobalMappingForFunc("array_int64_remove", types->i64_ptr_type(), args, reinterpret_cast(array_int64_remove)); - args = {types->i64_type(), // int64_t execution_context - types->float_ptr_type(), // float* input data ptr - types->i32_type(), // int32_t input length - types->i32_ptr_type(), // input validity buffer - types->i1_type(), // bool input row validity - types->float_type(), //value to remove from input - types->i1_type(), // bool validity --Needed? - types->i64_type(), //in loop var --Needed? - types->i64_type(), //in validity_index_var index into the valdity vector for the current row. - types->i1_ptr_type(), //output validity for the row - types->i32_ptr_type(), // output array length - types->i32_ptr_type() //output pointer to new validity buffer - - }; - - engine->AddGlobalMappingForFunc("array_float32_remove", - types->float_ptr_type(), args, + args = { + types->i64_type(), // int64_t execution_context + types->float_ptr_type(), // float* input data ptr + types->i32_type(), // int32_t input length + types->i32_ptr_type(), // input validity buffer + types->i1_type(), // bool input row validity + types->float_type(), // value to remove from input + types->i1_type(), // bool validity --Needed? + types->i64_type(), // in loop var --Needed? + types->i64_type(), // in validity_index_var index into the valdity vector for the + // current row. + types->i1_ptr_type(), // output validity for the row + types->i32_ptr_type(), // output array length + types->i32_ptr_type() // output pointer to new validity buffer + + }; + + engine->AddGlobalMappingForFunc("array_float32_remove", types->float_ptr_type(), args, reinterpret_cast(array_float32_remove)); - args = {types->i64_type(), // int64_t execution_context - types->double_ptr_type(), // int8_t* input data ptr - types->i32_type(), // int32_t input length - types->i32_ptr_type(), // input validity buffer - types->i1_type(), // bool input row validity - types->double_type(), //value to remove from input - types->i1_type(), // bool validity --Needed? - types->i64_type(), //in loop var --Needed? - types->i64_type(), //in validity_index_var index into the valdity vector for the current row. - types->i1_ptr_type(), //output validity for the row - types->i32_ptr_type(), // output array length - types->i32_ptr_type() //output pointer to new validity buffer - - }; - - engine->AddGlobalMappingForFunc("array_float64_remove", - types->double_ptr_type(), args, + args = { + types->i64_type(), // int64_t execution_context + types->double_ptr_type(), // int8_t* input data ptr + types->i32_type(), // int32_t input length + types->i32_ptr_type(), // input validity buffer + types->i1_type(), // bool input row validity + types->double_type(), // value to remove from input + types->i1_type(), // bool validity --Needed? + types->i64_type(), // in loop var --Needed? + types->i64_type(), // in validity_index_var index into the valdity vector for the + // current row. + types->i1_ptr_type(), // output validity for the row + types->i32_ptr_type(), // output array length + types->i32_ptr_type() // output pointer to new validity buffer + + }; + + engine->AddGlobalMappingForFunc("array_float64_remove", types->double_ptr_type(), args, reinterpret_cast(array_float64_remove)); return arrow::Status::OK(); } diff --git a/cpp/src/gandiva/array_ops.h b/cpp/src/gandiva/array_ops.h index c0de72a39472..9b7d1d93b2be 100644 --- a/cpp/src/gandiva/array_ops.h +++ b/cpp/src/gandiva/array_ops.h @@ -30,57 +30,60 @@ extern "C" { GANDIVA_EXPORT bool array_int32_contains_int32(int64_t context_ptr, const int32_t* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - int32_t contains_data, bool entry_validWhat, - int64_t loop_var, int64_t validity_index_var, - bool* valid_buf); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, int32_t contains_data, + bool entry_validWhat, int64_t loop_var, + int64_t validity_index_var, bool* valid_buf); GANDIVA_EXPORT bool array_int64_contains_int64(int64_t context_ptr, const int64_t* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - int64_t contains_data, bool entry_validWhat, - int64_t loop_var, int64_t validity_index_var, - bool* valid_buf); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, int64_t contains_data, + bool entry_validWhat, int64_t loop_var, + int64_t validity_index_var, bool* valid_buf); GANDIVA_EXPORT bool array_float32_contains_float32(int64_t context_ptr, const float* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - float contains_data, bool entry_validWhat, - int64_t loop_var, int64_t validity_index_var, - bool* valid_buf); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, float contains_data, + bool entry_validWhat, int64_t loop_var, + int64_t validity_index_var, bool* valid_buf); GANDIVA_EXPORT bool array_float64_contains_float64(int64_t context_ptr, const double* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - double contains_data, bool entry_validWhat, - int64_t loop_var, int64_t validity_index_var, - bool* valid_buf); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, double contains_data, + bool entry_validWhat, int64_t loop_var, + int64_t validity_index_var, bool* valid_buf); GANDIVA_EXPORT int32_t* array_int32_remove(int64_t context_ptr, const int32_t* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - int32_t remove_data, bool entry_validWhat, - int64_t loop_var, int64_t validity_index_var, - bool* valid_row, int32_t* out_len, int32_t** valid_ptr); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, int32_t remove_data, + bool entry_validWhat, int64_t loop_var, + int64_t validity_index_var, bool* valid_row, int32_t* out_len, + int32_t** valid_ptr); GANDIVA_EXPORT int64_t* array_int64_remove(int64_t context_ptr, const int64_t* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - int64_t remove_data, bool entry_validWhat, - int64_t loop_var, int64_t validity_index_var, - bool* valid_row, int32_t* out_len, int32_t** valid_ptr); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, int64_t remove_data, + bool entry_validWhat, int64_t loop_var, + int64_t validity_index_var, bool* valid_row, int32_t* out_len, + int32_t** valid_ptr); GANDIVA_EXPORT float* array_float32_remove(int64_t context_ptr, const float* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - float remove_data, bool entry_validWhat, - int64_t loop_var, int64_t validity_index_var, - bool* valid_row, int32_t* out_len, int32_t** valid_ptr); + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, float remove_data, + bool entry_validWhat, int64_t loop_var, + int64_t validity_index_var, bool* valid_row, int32_t* out_len, + int32_t** valid_ptr); GANDIVA_EXPORT double* array_float64_remove(int64_t context_ptr, const double* entry_buf, - int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - double remove_data, bool entry_validWhat, - int64_t loop_var, int64_t validity_index_var, - bool* valid_row, int32_t* out_len, int32_t** valid_ptr); - + int32_t entry_len, const int32_t* entry_validity, + bool combined_row_validity, double remove_data, + bool entry_validWhat, int64_t loop_var, + int64_t validity_index_var, bool* valid_row, + int32_t* out_len, int32_t** valid_ptr); } diff --git a/cpp/src/gandiva/array_ops_test.cc b/cpp/src/gandiva/array_ops_test.cc index bf01c1fe0a09..9732482b42ce 100644 --- a/cpp/src/gandiva/array_ops_test.cc +++ b/cpp/src/gandiva/array_ops_test.cc @@ -32,10 +32,9 @@ TEST(TestArrayOps, TestInt32ContainsInt32) { int32_t entry_validity = 15; bool valid = false; - EXPECT_EQ( - array_int32_contains_int32(ctx_ptr, data, entry_offsets_len, &entry_validity, - true, contains_data, true, 0, 3, &valid), - true); + EXPECT_EQ(array_int32_contains_int32(ctx_ptr, data, entry_offsets_len, &entry_validity, + true, contains_data, true, 0, 3, &valid), + true); } } // namespace gandiva diff --git a/cpp/src/gandiva/encrypt_mode_dispatcher.cc b/cpp/src/gandiva/encrypt_mode_dispatcher.cc index fad1c54ba9f3..70f61c06ebdc 100644 --- a/cpp/src/gandiva/encrypt_mode_dispatcher.cc +++ b/cpp/src/gandiva/encrypt_mode_dispatcher.cc @@ -16,23 +16,21 @@ // under the License. #include "gandiva/encrypt_mode_dispatcher.h" -#include "gandiva/encrypt_utils_ecb.h" -#include "gandiva/encrypt_utils_cbc.h" -#include "gandiva/encrypt_utils_gcm.h" -#include "arrow/util/string.h" -#include #include #include +#include #include +#include "arrow/util/string.h" +#include "gandiva/encrypt_utils_cbc.h" +#include "gandiva/encrypt_utils_ecb.h" +#include "gandiva/encrypt_utils_gcm.h" namespace gandiva { // Supported encryption modes static const std::vector SUPPORTED_MODES = { - AES_ECB_MODE, AES_ECB_PKCS7_MODE, AES_ECB_NONE_MODE, - AES_CBC_MODE, AES_CBC_PKCS7_MODE, AES_CBC_NONE_MODE, - AES_GCM_MODE -}; + AES_ECB_MODE, AES_ECB_PKCS7_MODE, AES_ECB_NONE_MODE, AES_CBC_MODE, + AES_CBC_PKCS7_MODE, AES_CBC_NONE_MODE, AES_GCM_MODE}; enum class EncryptionMode { ECB, @@ -56,13 +54,13 @@ EncryptionMode ParseEncryptionMode(std::string_view mode_str) { return EncryptionMode::UNKNOWN; } -int32_t EncryptModeDispatcher::encrypt( - const char* plaintext, int32_t plaintext_len, const char* key, - int32_t key_len, const char* mode, int32_t mode_len, const char* iv, - int32_t iv_len, const char* fifth_argument, int32_t fifth_argument_len, - unsigned char* cipher) { - std::string mode_str = - arrow::internal::AsciiToUpper(std::string_view(mode, mode_len)); +int32_t EncryptModeDispatcher::encrypt(const char* plaintext, int32_t plaintext_len, + const char* key, int32_t key_len, const char* mode, + int32_t mode_len, const char* iv, int32_t iv_len, + const char* fifth_argument, + int32_t fifth_argument_len, + unsigned char* cipher) { + std::string mode_str = arrow::internal::AsciiToUpper(std::string_view(mode, mode_len)); switch (ParseEncryptionMode(mode_str)) { case EncryptionMode::ECB: @@ -75,15 +73,15 @@ int32_t EncryptModeDispatcher::encrypt( case EncryptionMode::CBC: case EncryptionMode::CBC_PKCS7: // Shorthand AES-CBC and explicit AES-CBC-PKCS7 both use CBC with PKCS7 - return aes_encrypt_cbc(plaintext, plaintext_len, key, key_len, - iv, iv_len, true, cipher); + return aes_encrypt_cbc(plaintext, plaintext_len, key, key_len, iv, iv_len, true, + cipher); case EncryptionMode::CBC_NONE: // CBC without padding - return aes_encrypt_cbc(plaintext, plaintext_len, key, key_len, - iv, iv_len, false, cipher); + return aes_encrypt_cbc(plaintext, plaintext_len, key, key_len, iv, iv_len, false, + cipher); case EncryptionMode::GCM: - return aes_encrypt_gcm(plaintext, plaintext_len, key, key_len, - iv, iv_len, fifth_argument, fifth_argument_len, cipher); + return aes_encrypt_gcm(plaintext, plaintext_len, key, key_len, iv, iv_len, + fifth_argument, fifth_argument_len, cipher); case EncryptionMode::UNKNOWN: default: { std::string modes_str = arrow::internal::JoinStrings(SUPPORTED_MODES, ", "); @@ -95,13 +93,13 @@ int32_t EncryptModeDispatcher::encrypt( } } -int32_t EncryptModeDispatcher::decrypt( - const char* ciphertext, int32_t ciphertext_len, const char* key, - int32_t key_len, const char* mode, int32_t mode_len, const char* iv, - int32_t iv_len, const char* fifth_argument, int32_t fifth_argument_len, - unsigned char* plaintext) { - std::string mode_str = - arrow::internal::AsciiToUpper(std::string_view(mode, mode_len)); +int32_t EncryptModeDispatcher::decrypt(const char* ciphertext, int32_t ciphertext_len, + const char* key, int32_t key_len, const char* mode, + int32_t mode_len, const char* iv, int32_t iv_len, + const char* fifth_argument, + int32_t fifth_argument_len, + unsigned char* plaintext) { + std::string mode_str = arrow::internal::AsciiToUpper(std::string_view(mode, mode_len)); switch (ParseEncryptionMode(mode_str)) { case EncryptionMode::ECB: @@ -114,15 +112,15 @@ int32_t EncryptModeDispatcher::decrypt( case EncryptionMode::CBC: case EncryptionMode::CBC_PKCS7: // Shorthand AES-CBC and explicit AES-CBC-PKCS7 both use CBC with PKCS7 - return aes_decrypt_cbc(ciphertext, ciphertext_len, key, key_len, - iv, iv_len, true, plaintext); + return aes_decrypt_cbc(ciphertext, ciphertext_len, key, key_len, iv, iv_len, true, + plaintext); case EncryptionMode::CBC_NONE: // CBC without padding - return aes_decrypt_cbc(ciphertext, ciphertext_len, key, key_len, - iv, iv_len, false, plaintext); + return aes_decrypt_cbc(ciphertext, ciphertext_len, key, key_len, iv, iv_len, false, + plaintext); case EncryptionMode::GCM: - return aes_decrypt_gcm(ciphertext, ciphertext_len, key, key_len, - iv, iv_len, fifth_argument, fifth_argument_len, plaintext); + return aes_decrypt_gcm(ciphertext, ciphertext_len, key, key_len, iv, iv_len, + fifth_argument, fifth_argument_len, plaintext); case EncryptionMode::UNKNOWN: default: { std::string modes_str = arrow::internal::JoinStrings(SUPPORTED_MODES, ", "); @@ -135,4 +133,3 @@ int32_t EncryptModeDispatcher::decrypt( } } // namespace gandiva - diff --git a/cpp/src/gandiva/encrypt_mode_dispatcher.h b/cpp/src/gandiva/encrypt_mode_dispatcher.h index 20326845bd02..b4f7c5907a5f 100644 --- a/cpp/src/gandiva/encrypt_mode_dispatcher.h +++ b/cpp/src/gandiva/encrypt_mode_dispatcher.h @@ -45,12 +45,10 @@ class EncryptModeDispatcher { * @return Length of encrypted data in bytes * @throws std::runtime_error on encryption failure or unsupported mode */ - static int32_t encrypt(const char* plaintext, int32_t plaintext_len, - const char* key, int32_t key_len, - const char* mode, int32_t mode_len, - const char* iv, int32_t iv_len, - const char* fifth_argument, int32_t fifth_argument_len, - unsigned char* cipher); + static int32_t encrypt(const char* plaintext, int32_t plaintext_len, const char* key, + int32_t key_len, const char* mode, int32_t mode_len, + const char* iv, int32_t iv_len, const char* fifth_argument, + int32_t fifth_argument_len, unsigned char* cipher); /** * Decrypt data using the specified mode @@ -69,15 +67,12 @@ class EncryptModeDispatcher { * @return Length of decrypted data in bytes * @throws std::runtime_error on decryption failure or unsupported mode */ - static int32_t decrypt(const char* ciphertext, int32_t ciphertext_len, - const char* key, int32_t key_len, - const char* mode, int32_t mode_len, - const char* iv, int32_t iv_len, - const char* fifth_argument, int32_t fifth_argument_len, - unsigned char* plaintext); + static int32_t decrypt(const char* ciphertext, int32_t ciphertext_len, const char* key, + int32_t key_len, const char* mode, int32_t mode_len, + const char* iv, int32_t iv_len, const char* fifth_argument, + int32_t fifth_argument_len, unsigned char* plaintext); }; } // namespace gandiva #endif // GANDIVA_ENCRYPT_MODE_DISPATCHER_H - diff --git a/cpp/src/gandiva/encrypt_utils_cbc.cc b/cpp/src/gandiva/encrypt_utils_cbc.cc index 04eb60c96a77..e8f997e9e397 100644 --- a/cpp/src/gandiva/encrypt_utils_cbc.cc +++ b/cpp/src/gandiva/encrypt_utils_cbc.cc @@ -16,13 +16,13 @@ // under the License. #include "gandiva/encrypt_utils_cbc.h" -#include "gandiva/encrypt_utils_common.h" #include #include -#include +#include #include #include -#include +#include +#include "gandiva/encrypt_utils_common.h" namespace gandiva { @@ -49,8 +49,8 @@ const EVP_CIPHER* get_cbc_cipher_algo(int32_t key_length) { GANDIVA_EXPORT int32_t aes_encrypt_cbc(const char* plaintext, int32_t plaintext_len, const char* key, - int32_t key_len, const char* iv, int32_t iv_len, - bool use_padding, unsigned char* cipher) { + int32_t key_len, const char* iv, int32_t iv_len, bool use_padding, + unsigned char* cipher) { // Validate IV length if (iv_len != 16) { std::ostringstream oss; @@ -108,8 +108,8 @@ int32_t aes_encrypt_cbc(const char* plaintext, int32_t plaintext_len, const char GANDIVA_EXPORT int32_t aes_decrypt_cbc(const char* ciphertext, int32_t ciphertext_len, const char* key, - int32_t key_len, const char* iv, int32_t iv_len, - bool use_padding, unsigned char* plaintext) { + int32_t key_len, const char* iv, int32_t iv_len, bool use_padding, + unsigned char* plaintext) { // Validate IV length if (iv_len != 16) { std::ostringstream oss; @@ -166,4 +166,3 @@ int32_t aes_decrypt_cbc(const char* ciphertext, int32_t ciphertext_len, const ch } } // namespace gandiva - diff --git a/cpp/src/gandiva/encrypt_utils_cbc.h b/cpp/src/gandiva/encrypt_utils_cbc.h index b083d6f0a2de..9ac26eafbfa0 100644 --- a/cpp/src/gandiva/encrypt_utils_cbc.h +++ b/cpp/src/gandiva/encrypt_utils_cbc.h @@ -17,8 +17,8 @@ #pragma once -#include #include +#include #include "gandiva/visibility.h" namespace gandiva { @@ -44,8 +44,8 @@ constexpr const char* AES_CBC_NONE_MODE = "AES-CBC-NONE"; */ GANDIVA_EXPORT int32_t aes_encrypt_cbc(const char* plaintext, int32_t plaintext_len, const char* key, - int32_t key_len, const char* iv, int32_t iv_len, - bool use_padding, unsigned char* cipher); + int32_t key_len, const char* iv, int32_t iv_len, bool use_padding, + unsigned char* cipher); /** * Decrypt data using AES-CBC algorithm with explicit padding mode @@ -63,8 +63,7 @@ int32_t aes_encrypt_cbc(const char* plaintext, int32_t plaintext_len, const char */ GANDIVA_EXPORT int32_t aes_decrypt_cbc(const char* ciphertext, int32_t ciphertext_len, const char* key, - int32_t key_len, const char* iv, int32_t iv_len, - bool use_padding, unsigned char* plaintext); + int32_t key_len, const char* iv, int32_t iv_len, bool use_padding, + unsigned char* plaintext); } // namespace gandiva - diff --git a/cpp/src/gandiva/encrypt_utils_cbc_test.cc b/cpp/src/gandiva/encrypt_utils_cbc_test.cc index 8bf9227d65b4..6891ec0f4680 100644 --- a/cpp/src/gandiva/encrypt_utils_cbc_test.cc +++ b/cpp/src/gandiva/encrypt_utils_cbc_test.cc @@ -17,8 +17,8 @@ #include "gandiva/encrypt_utils_cbc.h" -#include #include +#include #include // Test PKCS#7 padding with 16-byte key @@ -36,9 +36,9 @@ TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptPkcs7_16) { iv, iv_len, true, cipher); unsigned char decrypted[64]; - int32_t decrypted_len = gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), - cipher_len, key, key_len, iv, iv_len, - true, decrypted); + int32_t decrypted_len = + gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), cipher_len, key, + key_len, iv, iv_len, true, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), std::string(reinterpret_cast(decrypted), decrypted_len)); @@ -59,9 +59,9 @@ TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptPkcs7_24) { iv, iv_len, true, cipher); unsigned char decrypted[64]; - int32_t decrypted_len = gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), - cipher_len, key, key_len, iv, iv_len, - true, decrypted); + int32_t decrypted_len = + gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), cipher_len, key, + key_len, iv, iv_len, true, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), std::string(reinterpret_cast(decrypted), decrypted_len)); @@ -82,9 +82,9 @@ TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptPkcs7_32) { iv, iv_len, true, cipher); unsigned char decrypted[64]; - int32_t decrypted_len = gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), - cipher_len, key, key_len, iv, iv_len, - true, decrypted); + int32_t decrypted_len = + gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), cipher_len, key, + key_len, iv, iv_len, true, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), std::string(reinterpret_cast(decrypted), decrypted_len)); @@ -105,9 +105,9 @@ TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptNoPadding_16) { iv, iv_len, false, cipher); unsigned char decrypted[64]; - int32_t decrypted_len = gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), - cipher_len, key, key_len, iv, iv_len, - false, decrypted); + int32_t decrypted_len = + gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), cipher_len, key, + key_len, iv, iv_len, false, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), std::string(reinterpret_cast(decrypted), decrypted_len)); @@ -125,8 +125,8 @@ TEST(TestAesCbcEncryptUtils, TestInvalidIVLength) { unsigned char cipher[64]; try { - gandiva::aes_encrypt_cbc(to_encrypt, to_encrypt_len, key, key_len, - iv, iv_len, true, cipher); + gandiva::aes_encrypt_cbc(to_encrypt, to_encrypt_len, key, key_len, iv, iv_len, true, + cipher); FAIL() << "Expected std::runtime_error"; } catch (const std::runtime_error& e) { EXPECT_THAT(e.what(), testing::HasSubstr("Invalid IV length for AES-CBC")); @@ -145,13 +145,10 @@ TEST(TestAesCbcEncryptUtils, TestInvalidKeyLength) { unsigned char cipher[64]; try { - gandiva::aes_encrypt_cbc(to_encrypt, to_encrypt_len, key, key_len, - iv, iv_len, true, cipher); + gandiva::aes_encrypt_cbc(to_encrypt, to_encrypt_len, key, key_len, iv, iv_len, true, + cipher); FAIL() << "Expected std::runtime_error"; } catch (const std::runtime_error& e) { EXPECT_THAT(e.what(), testing::HasSubstr("Unsupported key length for AES-CBC")); } } - - - diff --git a/cpp/src/gandiva/encrypt_utils_common.cc b/cpp/src/gandiva/encrypt_utils_common.cc index 3213e0c6e1a1..6eab3e84ac62 100644 --- a/cpp/src/gandiva/encrypt_utils_common.cc +++ b/cpp/src/gandiva/encrypt_utils_common.cc @@ -17,8 +17,8 @@ #include "gandiva/encrypt_utils_common.h" #include -#include #include +#include namespace gandiva { @@ -43,4 +43,3 @@ std::string get_openssl_error_string() { } } // namespace gandiva - diff --git a/cpp/src/gandiva/encrypt_utils_common.h b/cpp/src/gandiva/encrypt_utils_common.h index 62dc14db348e..747f13963a2e 100644 --- a/cpp/src/gandiva/encrypt_utils_common.h +++ b/cpp/src/gandiva/encrypt_utils_common.h @@ -24,12 +24,13 @@ namespace gandiva { /// @brief Get a human-readable error string from OpenSSL's error queue. /// @details Retrieves all errors from the OpenSSL error queue and concatenates them -/// with "; " as a separator. This ensures complete error information is captured. -/// @return A string describing all OpenSSL errors in the queue, or "Unknown OpenSSL error" +/// with "; " as a separator. This ensures complete error information is +/// captured. +/// @return A string describing all OpenSSL errors in the queue, or "Unknown OpenSSL +/// error" /// if no error is available. std::string get_openssl_error_string(); } // namespace gandiva #endif // GANDIVA_ENCRYPT_UTILS_COMMON_H - diff --git a/cpp/src/gandiva/encrypt_utils_common_test.cc b/cpp/src/gandiva/encrypt_utils_common_test.cc index de55758d5377..5161d4afeecb 100644 --- a/cpp/src/gandiva/encrypt_utils_common_test.cc +++ b/cpp/src/gandiva/encrypt_utils_common_test.cc @@ -17,8 +17,8 @@ #include "gandiva/encrypt_utils_common.h" -#include #include +#include #include #include @@ -92,4 +92,3 @@ TEST(TestOpenSSLErrorUtils, TestErrorQueueDrained) { EXPECT_EQ(second_call, "Unknown OpenSSL error"); } - diff --git a/cpp/src/gandiva/encrypt_utils_ecb.cc b/cpp/src/gandiva/encrypt_utils_ecb.cc index b4913e1c8802..662a3a0986ef 100644 --- a/cpp/src/gandiva/encrypt_utils_ecb.cc +++ b/cpp/src/gandiva/encrypt_utils_ecb.cc @@ -16,12 +16,12 @@ // under the License. #include "gandiva/encrypt_utils_ecb.h" -#include "gandiva/encrypt_utils_common.h" #include #include -#include #include #include +#include +#include "gandiva/encrypt_utils_common.h" namespace gandiva { @@ -145,4 +145,3 @@ int32_t aes_decrypt_ecb(const char* ciphertext, int32_t ciphertext_len, const ch } } // namespace gandiva - diff --git a/cpp/src/gandiva/encrypt_utils_ecb.h b/cpp/src/gandiva/encrypt_utils_ecb.h index ba62bf3bea9a..af4a7a7c85bd 100644 --- a/cpp/src/gandiva/encrypt_utils_ecb.h +++ b/cpp/src/gandiva/encrypt_utils_ecb.h @@ -17,8 +17,8 @@ #pragma once -#include #include +#include #include "gandiva/visibility.h" namespace gandiva { @@ -67,4 +67,3 @@ int32_t aes_decrypt_ecb(const char* ciphertext, int32_t ciphertext_len, const ch int32_t key_len, bool use_padding, unsigned char* plaintext); } // namespace gandiva - diff --git a/cpp/src/gandiva/encrypt_utils_ecb_test.cc b/cpp/src/gandiva/encrypt_utils_ecb_test.cc index 52687a8a4a9d..1245b397c6dc 100644 --- a/cpp/src/gandiva/encrypt_utils_ecb_test.cc +++ b/cpp/src/gandiva/encrypt_utils_ecb_test.cc @@ -30,11 +30,13 @@ TEST(TestAesEcbEncryptUtils, TestAesEncryptDecrypt) { static_cast(strlen(reinterpret_cast(to_encrypt))); unsigned char cipher_1[64]; - int32_t cipher_1_len = gandiva::aes_encrypt_ecb(to_encrypt, to_encrypt_len, key, key_len, true, cipher_1); + int32_t cipher_1_len = + gandiva::aes_encrypt_ecb(to_encrypt, to_encrypt_len, key, key_len, true, cipher_1); unsigned char decrypted_1[64]; - int32_t decrypted_1_len = gandiva::aes_decrypt_ecb(reinterpret_cast(cipher_1), - cipher_1_len, key, key_len, true, decrypted_1); + int32_t decrypted_1_len = + gandiva::aes_decrypt_ecb(reinterpret_cast(cipher_1), cipher_1_len, key, + key_len, true, decrypted_1); EXPECT_EQ(std::string(reinterpret_cast(to_encrypt), to_encrypt_len), std::string(reinterpret_cast(decrypted_1), decrypted_1_len)); @@ -48,11 +50,13 @@ TEST(TestAesEcbEncryptUtils, TestAesEncryptDecrypt) { static_cast(strlen(reinterpret_cast(to_encrypt))); unsigned char cipher_2[64]; - int32_t cipher_2_len = gandiva::aes_encrypt_ecb(to_encrypt, to_encrypt_len, key, key_len, true, cipher_2); + int32_t cipher_2_len = + gandiva::aes_encrypt_ecb(to_encrypt, to_encrypt_len, key, key_len, true, cipher_2); unsigned char decrypted_2[64]; - int32_t decrypted_2_len = gandiva::aes_decrypt_ecb(reinterpret_cast(cipher_2), - cipher_2_len, key, key_len, true, decrypted_2); + int32_t decrypted_2_len = + gandiva::aes_decrypt_ecb(reinterpret_cast(cipher_2), cipher_2_len, key, + key_len, true, decrypted_2); EXPECT_EQ(std::string(reinterpret_cast(to_encrypt), to_encrypt_len), std::string(reinterpret_cast(decrypted_2), decrypted_2_len)); @@ -66,18 +70,21 @@ TEST(TestAesEcbEncryptUtils, TestAesEncryptDecrypt) { static_cast(strlen(reinterpret_cast(to_encrypt))); unsigned char cipher_3[64]; - int32_t cipher_3_len = gandiva::aes_encrypt_ecb(to_encrypt, to_encrypt_len, key, key_len, true, cipher_3); + int32_t cipher_3_len = + gandiva::aes_encrypt_ecb(to_encrypt, to_encrypt_len, key, key_len, true, cipher_3); unsigned char decrypted_3[64]; - int32_t decrypted_3_len = gandiva::aes_decrypt_ecb(reinterpret_cast(cipher_3), - cipher_3_len, key, key_len, true, decrypted_3); + int32_t decrypted_3_len = + gandiva::aes_decrypt_ecb(reinterpret_cast(cipher_3), cipher_3_len, key, + key_len, true, decrypted_3); EXPECT_EQ(std::string(reinterpret_cast(to_encrypt), to_encrypt_len), std::string(reinterpret_cast(decrypted_3), decrypted_3_len)); // check exception char cipher[64] = "JBB7oJAQuqhDCx01fvBRi8PcljW1+nbnOSMk+R0Sz7E=="; - int32_t cipher_len = static_cast(strlen(reinterpret_cast(cipher))); + int32_t cipher_len = + static_cast(strlen(reinterpret_cast(cipher))); unsigned char plain_text[64]; key = "12345678abcdefgh12345678abcdefgh12345678abcdefgh12345678abcdefgh"; @@ -87,13 +94,16 @@ TEST(TestAesEcbEncryptUtils, TestAesEncryptDecrypt) { to_encrypt_len = static_cast(strlen(reinterpret_cast(to_encrypt))); unsigned char cipher_4[64]; - ASSERT_THROW({ - gandiva::aes_encrypt_ecb(to_encrypt, to_encrypt_len, key, key_len, true, cipher_4); - }, std::runtime_error); + ASSERT_THROW( + { + gandiva::aes_encrypt_ecb(to_encrypt, to_encrypt_len, key, key_len, true, + cipher_4); + }, + std::runtime_error); - ASSERT_THROW({ - gandiva::aes_decrypt_ecb(cipher, cipher_len, key, key_len, true, plain_text); - }, std::runtime_error); + ASSERT_THROW( + { gandiva::aes_decrypt_ecb(cipher, cipher_len, key, key_len, true, plain_text); }, + std::runtime_error); key = "12345678"; to_encrypt = "New\ntest\nstring"; @@ -102,11 +112,13 @@ TEST(TestAesEcbEncryptUtils, TestAesEncryptDecrypt) { to_encrypt_len = static_cast(strlen(reinterpret_cast(to_encrypt))); unsigned char cipher_5[64]; - ASSERT_THROW({ - gandiva::aes_encrypt_ecb(to_encrypt, to_encrypt_len, key, key_len, true, cipher_5); - }, std::runtime_error); - ASSERT_THROW({ - gandiva::aes_decrypt_ecb(cipher, cipher_len, key, key_len, true, plain_text); - }, std::runtime_error); + ASSERT_THROW( + { + gandiva::aes_encrypt_ecb(to_encrypt, to_encrypt_len, key, key_len, true, + cipher_5); + }, + std::runtime_error); + ASSERT_THROW( + { gandiva::aes_decrypt_ecb(cipher, cipher_len, key, key_len, true, plain_text); }, + std::runtime_error); } - diff --git a/cpp/src/gandiva/encrypt_utils_gcm.cc b/cpp/src/gandiva/encrypt_utils_gcm.cc index f028243da590..fa8600553c3d 100644 --- a/cpp/src/gandiva/encrypt_utils_gcm.cc +++ b/cpp/src/gandiva/encrypt_utils_gcm.cc @@ -16,12 +16,12 @@ // under the License. #include "gandiva/encrypt_utils_gcm.h" -#include "gandiva/encrypt_utils_common.h" #include #include -#include #include #include +#include +#include "gandiva/encrypt_utils_common.h" namespace gandiva { @@ -47,10 +47,9 @@ const EVP_CIPHER* get_gcm_cipher_algo(int32_t key_length) { } // namespace GANDIVA_EXPORT -int32_t aes_encrypt_gcm(const char* plaintext, int32_t plaintext_len, - const char* key, int32_t key_len, const char* iv, - int32_t iv_len, const char* aad, int32_t aad_len, - unsigned char* cipher) { +int32_t aes_encrypt_gcm(const char* plaintext, int32_t plaintext_len, const char* key, + int32_t key_len, const char* iv, int32_t iv_len, const char* aad, + int32_t aad_len, unsigned char* cipher) { if (iv_len <= 0) { throw std::runtime_error( "Invalid IV length for AES-GCM: IV length must be greater than 0"); @@ -125,10 +124,9 @@ int32_t aes_encrypt_gcm(const char* plaintext, int32_t plaintext_len, } GANDIVA_EXPORT -int32_t aes_decrypt_gcm(const char* ciphertext, int32_t ciphertext_len, - const char* key, int32_t key_len, const char* iv, - int32_t iv_len, const char* aad, int32_t aad_len, - unsigned char* plaintext) { +int32_t aes_decrypt_gcm(const char* ciphertext, int32_t ciphertext_len, const char* key, + int32_t key_len, const char* iv, int32_t iv_len, const char* aad, + int32_t aad_len, unsigned char* plaintext) { if (iv_len <= 0) { throw std::runtime_error( "Invalid IV length for AES-GCM: IV length must be greater than 0"); @@ -211,4 +209,3 @@ int32_t aes_decrypt_gcm(const char* ciphertext, int32_t ciphertext_len, } } // namespace gandiva - diff --git a/cpp/src/gandiva/encrypt_utils_gcm.h b/cpp/src/gandiva/encrypt_utils_gcm.h index 07a597af0b6c..3c315928003c 100644 --- a/cpp/src/gandiva/encrypt_utils_gcm.h +++ b/cpp/src/gandiva/encrypt_utils_gcm.h @@ -17,8 +17,8 @@ #pragma once -#include #include +#include #include "gandiva/visibility.h" namespace gandiva { @@ -40,14 +40,15 @@ constexpr int32_t GCM_TAG_LENGTH = 16; * @param iv_len Length of IV in bytes * @param aad Optional additional authenticated data (can be null) * @param aad_len Length of AAD in bytes (0 if aad is null) - * @param cipher Output buffer for encrypted data (must be at least plaintext_len + 16 bytes) + * @param cipher Output buffer for encrypted data (must be at least plaintext_len + 16 + * bytes) * @return Length of encrypted data in bytes (plaintext_len + 16 for the tag) * @throws std::runtime_error on encryption failure or invalid parameters */ GANDIVA_EXPORT int32_t aes_encrypt_gcm(const char* plaintext, int32_t plaintext_len, const char* key, - int32_t key_len, const char* iv, int32_t iv_len, - const char* aad, int32_t aad_len, unsigned char* cipher); + int32_t key_len, const char* iv, int32_t iv_len, const char* aad, + int32_t aad_len, unsigned char* cipher); /** * Decrypt data using AES-GCM algorithm @@ -62,12 +63,12 @@ int32_t aes_encrypt_gcm(const char* plaintext, int32_t plaintext_len, const char * @param aad_len Length of AAD in bytes (0 if aad is null) * @param plaintext Output buffer for decrypted data * @return Length of decrypted data in bytes (ciphertext_len - 16) - * @throws std::runtime_error on decryption failure, invalid parameters, or tag verification failure + * @throws std::runtime_error on decryption failure, invalid parameters, or tag + * verification failure */ GANDIVA_EXPORT int32_t aes_decrypt_gcm(const char* ciphertext, int32_t ciphertext_len, const char* key, - int32_t key_len, const char* iv, int32_t iv_len, - const char* aad, int32_t aad_len, unsigned char* plaintext); + int32_t key_len, const char* iv, int32_t iv_len, const char* aad, + int32_t aad_len, unsigned char* plaintext); } // namespace gandiva - diff --git a/cpp/src/gandiva/encrypt_utils_gcm_test.cc b/cpp/src/gandiva/encrypt_utils_gcm_test.cc index 2156132bc628..05e472f31446 100644 --- a/cpp/src/gandiva/encrypt_utils_gcm_test.cc +++ b/cpp/src/gandiva/encrypt_utils_gcm_test.cc @@ -17,8 +17,8 @@ #include "gandiva/encrypt_utils_gcm.h" -#include #include +#include #include // Test IV-only GCM with 16-byte key @@ -39,9 +39,9 @@ TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptIvOnly_16) { EXPECT_EQ(cipher_len, to_encrypt_len + 16); unsigned char decrypted[128]; - int32_t decrypted_len = gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), - cipher_len, key, key_len, iv, iv_len, - nullptr, 0, decrypted); + int32_t decrypted_len = + gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), cipher_len, key, + key_len, iv, iv_len, nullptr, 0, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), std::string(reinterpret_cast(decrypted), decrypted_len)); @@ -66,9 +66,9 @@ TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptWithAad_16) { EXPECT_EQ(cipher_len, to_encrypt_len + 16); unsigned char decrypted[128]; - int32_t decrypted_len = gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), - cipher_len, key, key_len, iv, iv_len, - aad, aad_len, decrypted); + int32_t decrypted_len = + gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), cipher_len, key, + key_len, iv, iv_len, aad, aad_len, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), std::string(reinterpret_cast(decrypted), decrypted_len)); @@ -89,9 +89,9 @@ TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptIvOnly_24) { iv, iv_len, nullptr, 0, cipher); unsigned char decrypted[128]; - int32_t decrypted_len = gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), - cipher_len, key, key_len, iv, iv_len, - nullptr, 0, decrypted); + int32_t decrypted_len = + gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), cipher_len, key, + key_len, iv, iv_len, nullptr, 0, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), std::string(reinterpret_cast(decrypted), decrypted_len)); @@ -112,9 +112,9 @@ TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptIvOnly_32) { iv, iv_len, nullptr, 0, cipher); unsigned char decrypted[128]; - int32_t decrypted_len = gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), - cipher_len, key, key_len, iv, iv_len, - nullptr, 0, decrypted); + int32_t decrypted_len = + gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), cipher_len, key, + key_len, iv, iv_len, nullptr, 0, decrypted); EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), std::string(reinterpret_cast(decrypted), decrypted_len)); @@ -138,9 +138,8 @@ TEST(TestAesGcmEncryptUtils, TestTagVerificationFailure) { cipher[cipher_len - 1] ^= 0xFF; unsigned char decrypted[128]; - EXPECT_THROW(gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), - cipher_len, key, key_len, iv, iv_len, - nullptr, 0, decrypted), + EXPECT_THROW(gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), cipher_len, + key, key_len, iv, iv_len, nullptr, 0, decrypted), std::runtime_error); } @@ -155,8 +154,7 @@ TEST(TestAesGcmEncryptUtils, TestInvalidIvLength) { auto to_encrypt_len = static_cast(strlen(to_encrypt)); unsigned char cipher[128]; - EXPECT_THROW(gandiva::aes_encrypt_gcm(to_encrypt, to_encrypt_len, key, key_len, - iv, iv_len, nullptr, 0, cipher), + EXPECT_THROW(gandiva::aes_encrypt_gcm(to_encrypt, to_encrypt_len, key, key_len, iv, + iv_len, nullptr, 0, cipher), std::runtime_error); } - diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc index 42316dce8d00..c7ef2a7f4f70 100644 --- a/cpp/src/gandiva/engine.cc +++ b/cpp/src/gandiva/engine.cc @@ -198,7 +198,7 @@ Status Engine::Make(const std::shared_ptr& conf, bool cached, #else using CodeGenOptLevel = llvm::CodeGenOpt::Level; #endif - auto const opt_level = + auto const opt_level = conf->optimize() ? CodeGenOptLevel::Aggressive : CodeGenOptLevel::None; // Note that the lifetime of the error string is not captured by the diff --git a/cpp/src/gandiva/field_descriptor.h b/cpp/src/gandiva/field_descriptor.h index dfcf6872d501..7eb6e5822efd 100644 --- a/cpp/src/gandiva/field_descriptor.h +++ b/cpp/src/gandiva/field_descriptor.h @@ -31,15 +31,15 @@ class FieldDescriptor { FieldDescriptor(FieldPtr field, int data_idx, int validity_idx = kInvalidIdx, int offsets_idx = kInvalidIdx, int data_buffer_ptr_idx = kInvalidIdx, - int child_offsets_idx = kInvalidIdx, int child_validity_idx = kInvalidIdx) + int child_offsets_idx = kInvalidIdx, + int child_validity_idx = kInvalidIdx) : field_(field), data_idx_(data_idx), validity_idx_(validity_idx), offsets_idx_(offsets_idx), data_buffer_ptr_idx_(data_buffer_ptr_idx), child_offsets_idx_(child_offsets_idx), - child_validity_idx_(child_validity_idx) { - } + child_validity_idx_(child_validity_idx) {} /// Index of validity array in the array-of-buffers int validity_idx() const { return validity_idx_; } @@ -56,9 +56,7 @@ class FieldDescriptor { /// Index of list type child data offsets int child_data_offsets_idx() const { return child_offsets_idx_; } int child_data_validity_idx() const { return child_validity_idx_; } - void set_child_data_validity_idx(int val) { - child_validity_idx_ = val; - } + void set_child_data_validity_idx(int val) { child_validity_idx_ = val; } FieldPtr field() const { return field_; } const std::string& Name() const { return field_->name(); } diff --git a/cpp/src/gandiva/function_registry.cc b/cpp/src/gandiva/function_registry.cc index a9051a244c73..f1aefb921d14 100644 --- a/cpp/src/gandiva/function_registry.cc +++ b/cpp/src/gandiva/function_registry.cc @@ -17,7 +17,6 @@ #include "gandiva/function_registry.h" - #include #include #include @@ -146,7 +145,8 @@ arrow::Result> MakeDefaultFunctionRegistry() { for (auto const& funcs : {GetArithmeticFunctionRegistry(), GetDateTimeFunctionRegistry(), GetHashFunctionRegistry(), GetMathOpsFunctionRegistry(), - GetStringFunctionRegistry(), GetDateTimeArithmeticFunctionRegistry(), GetArrayFunctionRegistry()}) { + GetStringFunctionRegistry(), GetDateTimeArithmeticFunctionRegistry(), + GetArrayFunctionRegistry()}) { for (auto const& func_signature : funcs) { ARROW_RETURN_NOT_OK(registry->Add(func_signature)); } diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc index 7750421360e3..bb910843632c 100644 --- a/cpp/src/gandiva/function_registry_string.cc +++ b/cpp/src/gandiva/function_registry_string.cc @@ -514,21 +514,23 @@ std::vector GetStringFunctionRegistry() { NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), // Parameters: data, key, mode, iv (e.g. CBC mode) - NativeFunction("encrypt", {}, DataTypeVector{binary(), binary(), utf8(), binary()}, binary(), - kResultNullIfNull, "gdv_fn_encrypt_dispatcher_4args", + NativeFunction("encrypt", {}, DataTypeVector{binary(), binary(), utf8(), binary()}, + binary(), kResultNullIfNull, "gdv_fn_encrypt_dispatcher_4args", NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), - NativeFunction("decrypt", {}, DataTypeVector{binary(), binary(), utf8(), binary()}, binary(), - kResultNullIfNull, "gdv_fn_decrypt_dispatcher_4args", + NativeFunction("decrypt", {}, DataTypeVector{binary(), binary(), utf8(), binary()}, + binary(), kResultNullIfNull, "gdv_fn_decrypt_dispatcher_4args", NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), // Parameters: data, key, mode, iv, fifth_argument (e.g. GCM mode) - NativeFunction("encrypt", {}, DataTypeVector{binary(), binary(), utf8(), binary(), binary()}, binary(), - kResultNullIfNull, "gdv_fn_encrypt_dispatcher_5args", + NativeFunction("encrypt", {}, + DataTypeVector{binary(), binary(), utf8(), binary(), binary()}, + binary(), kResultNullIfNull, "gdv_fn_encrypt_dispatcher_5args", NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), - NativeFunction("decrypt", {}, DataTypeVector{binary(), binary(), utf8(), binary(), binary()}, binary(), - kResultNullIfNull, "gdv_fn_decrypt_dispatcher_5args", + NativeFunction("decrypt", {}, + DataTypeVector{binary(), binary(), utf8(), binary(), binary()}, + binary(), kResultNullIfNull, "gdv_fn_decrypt_dispatcher_5args", NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), NativeFunction("mask_first_n", {}, DataTypeVector{utf8(), int32()}, utf8(), diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc index a33483e8a002..166f8874a806 100644 --- a/cpp/src/gandiva/gdv_function_stubs.cc +++ b/cpp/src/gandiva/gdv_function_stubs.cc @@ -30,9 +30,9 @@ #include "arrow/util/double_conversion.h" #include "arrow/util/value_parsing.h" -#include "gandiva/encrypt_utils_ecb.h" -#include "gandiva/encrypt_utils_cbc.h" #include "gandiva/encrypt_mode_dispatcher.h" +#include "gandiva/encrypt_utils_cbc.h" +#include "gandiva/encrypt_utils_ecb.h" #include "gandiva/engine.h" #include "gandiva/exported_funcs.h" #include "gandiva/in_holder.h" @@ -164,29 +164,31 @@ int32_t gdv_fn_populate_varlen_vector(int64_t context_ptr, int8_t* data_ptr, } /// Stub functions that can be accessed from LLVM or the pre-compiled library. -#define POPULATE_NUMERIC_LIST_TYPE_VECTOR(TYPE, SCALE) \ - int32_t gdv_fn_populate_list_##TYPE##_vector(int64_t context_ptr, int8_t* data_ptr, \ - int32_t* offsets, int64_t slot, \ - TYPE* entry_buf, int32_t entry_len, int32_t** valid_ptr) { \ - auto buffer = reinterpret_cast(data_ptr); \ - int32_t offset = static_cast(buffer->size()); \ - auto status = buffer->Resize(offset + entry_len * SCALE, false /*shrink*/); \ - if (!status.ok()) { \ - gandiva::ExecutionContext* context = \ - reinterpret_cast(context_ptr); \ - context->set_error_msg(status.message().c_str()); \ - return -1; \ - } \ - memcpy(buffer->mutable_data() + offset, (char*)entry_buf, entry_len * SCALE); \ - int validbitIndex = offset / SCALE; \ - for (int i = 0; i < entry_len; i++) { \ - arrow::bit_util::SetBitTo(buffer->validityBuffer, validbitIndex + i, arrow::bit_util::GetBit(reinterpret_cast(valid_ptr), i)); \ - } \ - offsets = reinterpret_cast(buffer->offsetBuffer); \ - offsets[slot] = offset / SCALE; \ - offsets[slot + 1] = offset / SCALE + entry_len; \ +#define POPULATE_NUMERIC_LIST_TYPE_VECTOR(TYPE, SCALE) \ + int32_t gdv_fn_populate_list_##TYPE##_vector( \ + int64_t context_ptr, int8_t* data_ptr, int32_t* offsets, int64_t slot, \ + TYPE* entry_buf, int32_t entry_len, int32_t** valid_ptr) { \ + auto buffer = reinterpret_cast(data_ptr); \ + int32_t offset = static_cast(buffer->size()); \ + auto status = buffer->Resize(offset + entry_len * SCALE, false /*shrink*/); \ + if (!status.ok()) { \ + gandiva::ExecutionContext* context = \ + reinterpret_cast(context_ptr); \ + context->set_error_msg(status.message().c_str()); \ + return -1; \ + } \ + memcpy(buffer->mutable_data() + offset, (char*)entry_buf, entry_len * SCALE); \ + int validbitIndex = offset / SCALE; \ + for (int i = 0; i < entry_len; i++) { \ + arrow::bit_util::SetBitTo( \ + buffer->validityBuffer, validbitIndex + i, \ + arrow::bit_util::GetBit(reinterpret_cast(valid_ptr), i)); \ + } \ + offsets = reinterpret_cast(buffer->offsetBuffer); \ + offsets[slot] = offset / SCALE; \ + offsets[slot + 1] = offset / SCALE + entry_len; \ return 0; \ - }\ + } POPULATE_NUMERIC_LIST_TYPE_VECTOR(int32_t, 4) POPULATE_NUMERIC_LIST_TYPE_VECTOR(int64_t, 8) @@ -398,8 +400,6 @@ CAST_NUMERIC_FROM_VARBINARY(double, arrow::DoubleType, FLOAT8) #undef GDV_FN_CAST_VARCHAR_INTEGER #undef GDV_FN_CAST_VARCHAR_REAL - - GANDIVA_EXPORT const char* gdv_mask_first_n_utf8_int32(int64_t context, const char* data, int32_t data_len, int32_t n_to_mask, @@ -858,12 +858,9 @@ namespace gandiva { // This is called by the LLVM engine with string calling convention // WARNING: This function is for backward compatibility only. Encrypted binary // data is not guaranteed to be valid UTF-8. Use binary signatures for new code. -extern "C" GANDIVA_EXPORT -const char* gdv_fn_aes_encrypt_ecb_legacy(int64_t context, const char* data, - int32_t data_len, - const char* key_data, - int32_t key_data_len, - int32_t* out_len) { +extern "C" GANDIVA_EXPORT const char* gdv_fn_aes_encrypt_ecb_legacy( + int64_t context, const char* data, int32_t data_len, const char* key_data, + int32_t key_data_len, int32_t* out_len) { // Delegate to the core implementation with ECB mode // This function is ECB-only, so we enforce the mode const char* mode = "AES-ECB"; @@ -885,12 +882,9 @@ const char* gdv_fn_aes_encrypt_ecb_legacy(int64_t context, const char* data, // This is called by the LLVM engine with string calling convention // WARNING: This function is for backward compatibility only. Decrypted data // may not be valid UTF-8. Use binary signatures for new code. -extern "C" GANDIVA_EXPORT -const char* gdv_fn_aes_decrypt_ecb_legacy(int64_t context, const char* data, - int32_t data_len, - const char* key_data, - int32_t key_data_len, - int32_t* out_len) { +extern "C" GANDIVA_EXPORT const char* gdv_fn_aes_decrypt_ecb_legacy( + int64_t context, const char* data, int32_t data_len, const char* key_data, + int32_t key_data_len, int32_t* out_len) { // Delegate to the core implementation with ECB mode // This function is ECB-only, so we enforce the mode const char* mode = "AES-ECB"; @@ -909,52 +903,43 @@ const char* gdv_fn_aes_decrypt_ecb_legacy(int64_t context, const char* data, } // The 3- and 4-arg signatures exist to support optional IV and other arguments -extern "C" GANDIVA_EXPORT -const char* gdv_fn_encrypt_dispatcher_3args( +extern "C" GANDIVA_EXPORT const char* gdv_fn_encrypt_dispatcher_3args( int64_t context, const char* data, int32_t data_len, const char* key_data, - int32_t key_data_len, const char* mode, int32_t mode_len, - int32_t* out_len) { - return gdv_fn_encrypt_dispatcher_5args( - context, data, data_len, key_data, key_data_len, mode, mode_len, nullptr, - 0, nullptr, 0, out_len); + int32_t key_data_len, const char* mode, int32_t mode_len, int32_t* out_len) { + return gdv_fn_encrypt_dispatcher_5args(context, data, data_len, key_data, key_data_len, + mode, mode_len, nullptr, 0, nullptr, 0, out_len); } -extern "C" GANDIVA_EXPORT -const char* gdv_fn_decrypt_dispatcher_3args( +extern "C" GANDIVA_EXPORT const char* gdv_fn_decrypt_dispatcher_3args( int64_t context, const char* data, int32_t data_len, const char* key_data, - int32_t key_data_len, const char* mode, int32_t mode_len, - int32_t* out_len) { - return gdv_fn_decrypt_dispatcher_5args( - context, data, data_len, key_data, key_data_len, mode, mode_len, nullptr, - 0, nullptr, 0, out_len); + int32_t key_data_len, const char* mode, int32_t mode_len, int32_t* out_len) { + return gdv_fn_decrypt_dispatcher_5args(context, data, data_len, key_data, key_data_len, + mode, mode_len, nullptr, 0, nullptr, 0, out_len); } -extern "C" GANDIVA_EXPORT -const char* gdv_fn_encrypt_dispatcher_4args( +extern "C" GANDIVA_EXPORT const char* gdv_fn_encrypt_dispatcher_4args( int64_t context, const char* data, int32_t data_len, const char* key_data, - int32_t key_data_len, const char* mode, int32_t mode_len, - const char* iv_data, int32_t iv_data_len, int32_t* out_len) { - return gdv_fn_encrypt_dispatcher_5args( - context, data, data_len, key_data, key_data_len, mode, mode_len, iv_data, - iv_data_len, nullptr, 0, out_len); + int32_t key_data_len, const char* mode, int32_t mode_len, const char* iv_data, + int32_t iv_data_len, int32_t* out_len) { + return gdv_fn_encrypt_dispatcher_5args(context, data, data_len, key_data, key_data_len, + mode, mode_len, iv_data, iv_data_len, nullptr, 0, + out_len); } -extern "C" GANDIVA_EXPORT -const char* gdv_fn_decrypt_dispatcher_4args( +extern "C" GANDIVA_EXPORT const char* gdv_fn_decrypt_dispatcher_4args( int64_t context, const char* data, int32_t data_len, const char* key_data, - int32_t key_data_len, const char* mode, int32_t mode_len, - const char* iv_data, int32_t iv_data_len, int32_t* out_len) { - return gdv_fn_decrypt_dispatcher_5args( - context, data, data_len, key_data, key_data_len, mode, mode_len, iv_data, - iv_data_len, nullptr, 0, out_len); + int32_t key_data_len, const char* mode, int32_t mode_len, const char* iv_data, + int32_t iv_data_len, int32_t* out_len) { + return gdv_fn_decrypt_dispatcher_5args(context, data, data_len, key_data, key_data_len, + mode, mode_len, iv_data, iv_data_len, nullptr, 0, + out_len); } -extern "C" GANDIVA_EXPORT -const char* gdv_fn_encrypt_dispatcher_5args( +extern "C" GANDIVA_EXPORT const char* gdv_fn_encrypt_dispatcher_5args( int64_t context, const char* data, int32_t data_len, const char* key_data, - int32_t key_data_len, const char* mode, int32_t mode_len, - const char* iv_data, int32_t iv_data_len, const char* fifth_argument, - int32_t fifth_argument_len, int32_t* out_len) { + int32_t key_data_len, const char* mode, int32_t mode_len, const char* iv_data, + int32_t iv_data_len, const char* fifth_argument, int32_t fifth_argument_len, + int32_t* out_len) { try { // Allocate extra 16 bytes for AES block padding (PKCS7 padding can add // up to 16 bytes for a 128-bit block cipher) @@ -962,13 +947,12 @@ const char* gdv_fn_encrypt_dispatcher_5args( auto* output = reinterpret_cast( gdv_fn_context_arena_malloc(context, data_len + 16)); if (output == nullptr) { - throw std::runtime_error( - "Memory allocation failed for encryption output"); + throw std::runtime_error("Memory allocation failed for encryption output"); } int32_t cipher_len = EncryptModeDispatcher::encrypt( - data, data_len, key_data, key_data_len, mode, mode_len, iv_data, - iv_data_len, fifth_argument, fifth_argument_len, output); + data, data_len, key_data, key_data_len, mode, mode_len, iv_data, iv_data_len, + fifth_argument, fifth_argument_len, output); *out_len = cipher_len; return reinterpret_cast(output); @@ -979,23 +963,21 @@ const char* gdv_fn_encrypt_dispatcher_5args( } } -extern "C" GANDIVA_EXPORT -const char* gdv_fn_decrypt_dispatcher_5args( +extern "C" GANDIVA_EXPORT const char* gdv_fn_decrypt_dispatcher_5args( int64_t context, const char* data, int32_t data_len, const char* key_data, - int32_t key_data_len, const char* mode, int32_t mode_len, - const char* iv_data, int32_t iv_data_len, const char* fifth_argument, - int32_t fifth_argument_len, int32_t* out_len) { + int32_t key_data_len, const char* mode, int32_t mode_len, const char* iv_data, + int32_t iv_data_len, const char* fifth_argument, int32_t fifth_argument_len, + int32_t* out_len) { try { - auto* output = reinterpret_cast( - gdv_fn_context_arena_malloc(context, data_len)); + auto* output = + reinterpret_cast(gdv_fn_context_arena_malloc(context, data_len)); if (output == nullptr) { - throw std::runtime_error( - "Memory allocation failed for decryption output"); + throw std::runtime_error("Memory allocation failed for decryption output"); } int32_t plaintext_len = EncryptModeDispatcher::decrypt( - data, data_len, key_data, key_data_len, mode, mode_len, iv_data, - iv_data_len, fifth_argument, fifth_argument_len, output); + data, data_len, key_data, key_data_len, mode, mode_len, iv_data, iv_data_len, + fifth_argument, fifth_argument_len, output); *out_len = plaintext_len; return reinterpret_cast(output); @@ -1254,8 +1236,7 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { }; engine->AddGlobalMappingForFunc( - "gdv_fn_encrypt_dispatcher_3args", - types->i8_ptr_type() /*return_type*/, args, + "gdv_fn_encrypt_dispatcher_3args", types->i8_ptr_type() /*return_type*/, args, reinterpret_cast(gdv_fn_encrypt_dispatcher_3args)); // gdv_fn_decrypt_dispatcher_3args (data, key, mode) @@ -1271,8 +1252,7 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { }; engine->AddGlobalMappingForFunc( - "gdv_fn_decrypt_dispatcher_3args", - types->i8_ptr_type() /*return_type*/, args, + "gdv_fn_decrypt_dispatcher_3args", types->i8_ptr_type() /*return_type*/, args, reinterpret_cast(gdv_fn_decrypt_dispatcher_3args)); // gdv_fn_encrypt_dispatcher_4args (data, key, mode, iv) @@ -1290,8 +1270,7 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { }; engine->AddGlobalMappingForFunc( - "gdv_fn_encrypt_dispatcher_4args", - types->i8_ptr_type() /*return_type*/, args, + "gdv_fn_encrypt_dispatcher_4args", types->i8_ptr_type() /*return_type*/, args, reinterpret_cast(gdv_fn_encrypt_dispatcher_4args)); // gdv_fn_decrypt_dispatcher_4args (data, key, mode, iv) @@ -1309,8 +1288,7 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { }; engine->AddGlobalMappingForFunc( - "gdv_fn_decrypt_dispatcher_4args", - types->i8_ptr_type() /*return_type*/, args, + "gdv_fn_decrypt_dispatcher_4args", types->i8_ptr_type() /*return_type*/, args, reinterpret_cast(gdv_fn_decrypt_dispatcher_4args)); // gdv_fn_encrypt_dispatcher_5args (data, key, mode, iv, @@ -1331,8 +1309,7 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { }; engine->AddGlobalMappingForFunc( - "gdv_fn_encrypt_dispatcher_5args", - types->i8_ptr_type() /*return_type*/, args, + "gdv_fn_encrypt_dispatcher_5args", types->i8_ptr_type() /*return_type*/, args, reinterpret_cast(gdv_fn_encrypt_dispatcher_5args)); // gdv_fn_decrypt_dispatcher_5args (data, key, mode, iv, @@ -1353,8 +1330,7 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { }; engine->AddGlobalMappingForFunc( - "gdv_fn_decrypt_dispatcher_5args", - types->i8_ptr_type() /*return_type*/, args, + "gdv_fn_decrypt_dispatcher_5args", types->i8_ptr_type() /*return_type*/, args, reinterpret_cast(gdv_fn_decrypt_dispatcher_5args)); // gdv_mask_first_n and gdv_mask_last_n @@ -1454,8 +1430,7 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { }; engine->AddGlobalMappingForFunc( - "gdv_fn_cast_intervalday_utf8_int32", - types->i64_type() /*return_type*/, args, + "gdv_fn_cast_intervalday_utf8_int32", types->i64_type() /*return_type*/, args, reinterpret_cast(gdv_fn_cast_intervalday_utf8_int32)); // gdv_fn_cast_intervalyear_utf8 @@ -1472,15 +1447,13 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { types->i32_type() /*return_type*/, args, reinterpret_cast(gdv_fn_cast_intervalyear_utf8)); -#define ADD_MAPPING_FOR_NUMERIC_LIST_TYPE_POPULATE_FUNCTION( \ - LLVM_TYPE, DATA_TYPE) \ - args = {types->i64_type(), types->i8_ptr_type(), types->i32_ptr_type(), \ - types->i64_type(), types->LLVM_TYPE##_ptr_type(), \ - types->i32_type(), types->i32_ptr_type()}; \ - engine->AddGlobalMappingForFunc( \ - "gdv_fn_populate_list_" #DATA_TYPE "_vector", \ - types->i32_type() /*return_type*/, args, \ - reinterpret_cast(gdv_fn_populate_list_##DATA_TYPE##_vector)); +#define ADD_MAPPING_FOR_NUMERIC_LIST_TYPE_POPULATE_FUNCTION(LLVM_TYPE, DATA_TYPE) \ + args = {types->i64_type(), types->i8_ptr_type(), types->i32_ptr_type(), \ + types->i64_type(), types->LLVM_TYPE##_ptr_type(), types->i32_type(), \ + types->i32_ptr_type()}; \ + engine->AddGlobalMappingForFunc( \ + "gdv_fn_populate_list_" #DATA_TYPE "_vector", types->i32_type() /*return_type*/, \ + args, reinterpret_cast(gdv_fn_populate_list_##DATA_TYPE##_vector)); ADD_MAPPING_FOR_NUMERIC_LIST_TYPE_POPULATE_FUNCTION(i32, int32_t) ADD_MAPPING_FOR_NUMERIC_LIST_TYPE_POPULATE_FUNCTION(i64, int64_t) @@ -1504,15 +1477,16 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { reinterpret_cast(gdv_fn_cast_intervalyear_utf8_int32)); // gdv_fn_populate_list_varlen_vector - args = {types->i64_type(), // int64_t execution_context - types->i8_ptr_type(), // int8_t* data ptr - types->i32_ptr_type(), // int32_t* offsets ptr - types->i32_ptr_type(), // int32_t* child offsets ptr - types->i64_type(), // int64_t slot - types->i8_ptr_type(), // const char* entry_buf - types->i32_ptr_type(), // int32_t* entry child offsets ptr - types->i32_type(), // int32_t entry child offsets length - types->i32_ptr_type() // int32_t* entry child valid ptr + args = { + types->i64_type(), // int64_t execution_context + types->i8_ptr_type(), // int8_t* data ptr + types->i32_ptr_type(), // int32_t* offsets ptr + types->i32_ptr_type(), // int32_t* child offsets ptr + types->i64_type(), // int64_t slot + types->i8_ptr_type(), // const char* entry_buf + types->i32_ptr_type(), // int32_t* entry child offsets ptr + types->i32_type(), // int32_t entry child offsets length + types->i32_ptr_type() // int32_t* entry child valid ptr }; engine->AddGlobalMappingForFunc( diff --git a/cpp/src/gandiva/gdv_function_stubs.h b/cpp/src/gandiva/gdv_function_stubs.h index 54480ac7f6f4..b77b2240abe9 100644 --- a/cpp/src/gandiva/gdv_function_stubs.h +++ b/cpp/src/gandiva/gdv_function_stubs.h @@ -192,62 +192,60 @@ double gdv_fn_castFLOAT8_varbinary(gdv_int64 context, const char* in, int32_t in // Legacy wrappers for string-based AES-ECB signatures GANDIVA_EXPORT const char* gdv_fn_aes_encrypt_ecb_legacy(int64_t context, const char* data, - int32_t data_len, - const char* key_data, - int32_t key_data_len, - int32_t* out_len); + int32_t data_len, const char* key_data, + int32_t key_data_len, int32_t* out_len); GANDIVA_EXPORT const char* gdv_fn_aes_decrypt_ecb_legacy(int64_t context, const char* data, - int32_t data_len, - const char* key_data, - int32_t key_data_len, - int32_t* out_len); + int32_t data_len, const char* key_data, + int32_t key_data_len, int32_t* out_len); // 3-argument dispatcher: (data, key, mode) GANDIVA_EXPORT -const char* gdv_fn_encrypt_dispatcher_3args( - int64_t context, const char* data, int32_t data_len, - const char* key_data, int32_t key_data_len, const char* mode, - int32_t mode_len, int32_t* out_len); +const char* gdv_fn_encrypt_dispatcher_3args(int64_t context, const char* data, + int32_t data_len, const char* key_data, + int32_t key_data_len, const char* mode, + int32_t mode_len, int32_t* out_len); GANDIVA_EXPORT -const char* gdv_fn_decrypt_dispatcher_3args( - int64_t context, const char* data, int32_t data_len, - const char* key_data, int32_t key_data_len, const char* mode, - int32_t mode_len, int32_t* out_len); +const char* gdv_fn_decrypt_dispatcher_3args(int64_t context, const char* data, + int32_t data_len, const char* key_data, + int32_t key_data_len, const char* mode, + int32_t mode_len, int32_t* out_len); // 4-argument dispatcher: (data, key, mode, iv) GANDIVA_EXPORT -const char* gdv_fn_encrypt_dispatcher_4args( - int64_t context, const char* data, int32_t data_len, - const char* key_data, int32_t key_data_len, const char* mode, - int32_t mode_len, const char* iv_data, int32_t iv_data_len, - int32_t* out_len); +const char* gdv_fn_encrypt_dispatcher_4args(int64_t context, const char* data, + int32_t data_len, const char* key_data, + int32_t key_data_len, const char* mode, + int32_t mode_len, const char* iv_data, + int32_t iv_data_len, int32_t* out_len); GANDIVA_EXPORT -const char* gdv_fn_decrypt_dispatcher_4args( - int64_t context, const char* data, int32_t data_len, - const char* key_data, int32_t key_data_len, const char* mode, - int32_t mode_len, const char* iv_data, int32_t iv_data_len, - int32_t* out_len); +const char* gdv_fn_decrypt_dispatcher_4args(int64_t context, const char* data, + int32_t data_len, const char* key_data, + int32_t key_data_len, const char* mode, + int32_t mode_len, const char* iv_data, + int32_t iv_data_len, int32_t* out_len); // 5-argument dispatcher: (data, key, mode, iv, fifth_argument) GANDIVA_EXPORT -const char* gdv_fn_encrypt_dispatcher_5args( - int64_t context, const char* data, int32_t data_len, - const char* key_data, int32_t key_data_len, const char* mode, - int32_t mode_len, const char* iv_data, int32_t iv_data_len, - const char* fifth_argument, int32_t fifth_argument_len, - int32_t* out_len); - -GANDIVA_EXPORT -const char* gdv_fn_decrypt_dispatcher_5args( - int64_t context, const char* data, int32_t data_len, - const char* key_data, int32_t key_data_len, const char* mode, - int32_t mode_len, const char* iv_data, int32_t iv_data_len, - const char* fifth_argument, int32_t fifth_argument_len, - int32_t* out_len); +const char* gdv_fn_encrypt_dispatcher_5args(int64_t context, const char* data, + int32_t data_len, const char* key_data, + int32_t key_data_len, const char* mode, + int32_t mode_len, const char* iv_data, + int32_t iv_data_len, + const char* fifth_argument, + int32_t fifth_argument_len, int32_t* out_len); + +GANDIVA_EXPORT +const char* gdv_fn_decrypt_dispatcher_5args(int64_t context, const char* data, + int32_t data_len, const char* key_data, + int32_t key_data_len, const char* mode, + int32_t mode_len, const char* iv_data, + int32_t iv_data_len, + const char* fifth_argument, + int32_t fifth_argument_len, int32_t* out_len); GANDIVA_EXPORT const char* gdv_mask_first_n_utf8_int32(int64_t context, const char* data, diff --git a/cpp/src/gandiva/gdv_function_stubs_test.cc b/cpp/src/gandiva/gdv_function_stubs_test.cc index bfb34eeb31d8..171c60eef256 100644 --- a/cpp/src/gandiva/gdv_function_stubs_test.cc +++ b/cpp/src/gandiva/gdv_function_stubs_test.cc @@ -22,10 +22,10 @@ #include #include "arrow/util/logging.h" -#include "gandiva/execution_context.h" -#include "gandiva/encrypt_utils_ecb.h" #include "gandiva/encrypt_utils_cbc.h" +#include "gandiva/encrypt_utils_ecb.h" #include "gandiva/encrypt_utils_gcm.h" +#include "gandiva/execution_context.h" namespace gandiva { @@ -1360,16 +1360,15 @@ TEST(TestGdvFnStubs, TestAesEncryptDecrypt16) { auto mode_len = static_cast(mode.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); - const char* cipher = gdv_fn_encrypt_dispatcher_3args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &cipher_len); - const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &decrypted_len); + const char* cipher = + gdv_fn_encrypt_dispatcher_3args(ctx_ptr, data.c_str(), data_len, key16.c_str(), + key16_len, mode.c_str(), mode_len, &cipher_len); + const char* decrypted_value = + gdv_fn_decrypt_dispatcher_3args(ctx_ptr, cipher, cipher_len, key16.c_str(), + key16_len, mode.c_str(), mode_len, &decrypted_len); EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + std::string(reinterpret_cast(decrypted_value), decrypted_len)); } TEST(TestGdvFnStubs, TestAesEncryptDecrypt24) { @@ -1384,17 +1383,16 @@ TEST(TestGdvFnStubs, TestAesEncryptDecrypt24) { auto mode_len = static_cast(mode.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); - const char* cipher = gdv_fn_encrypt_dispatcher_3args( - ctx_ptr, data.c_str(), data_len, key24.c_str(), key24_len, mode.c_str(), - mode_len, &cipher_len); + const char* cipher = + gdv_fn_encrypt_dispatcher_3args(ctx_ptr, data.c_str(), data_len, key24.c_str(), + key24_len, mode.c_str(), mode_len, &cipher_len); - const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( - ctx_ptr, cipher, cipher_len, key24.c_str(), key24_len, mode.c_str(), - mode_len, &decrypted_len); + const char* decrypted_value = + gdv_fn_decrypt_dispatcher_3args(ctx_ptr, cipher, cipher_len, key24.c_str(), + key24_len, mode.c_str(), mode_len, &decrypted_len); EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + std::string(reinterpret_cast(decrypted_value), decrypted_len)); } TEST(TestGdvFnStubs, TestAesEncryptDecrypt32) { @@ -1409,17 +1407,16 @@ TEST(TestGdvFnStubs, TestAesEncryptDecrypt32) { auto mode_len = static_cast(mode.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); - const char* cipher = gdv_fn_encrypt_dispatcher_3args( - ctx_ptr, data.c_str(), data_len, key32.c_str(), key32_len, mode.c_str(), - mode_len, &cipher_len); + const char* cipher = + gdv_fn_encrypt_dispatcher_3args(ctx_ptr, data.c_str(), data_len, key32.c_str(), + key32_len, mode.c_str(), mode_len, &cipher_len); - const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( - ctx_ptr, cipher, cipher_len, key32.c_str(), key32_len, mode.c_str(), - mode_len, &decrypted_len); + const char* decrypted_value = + gdv_fn_decrypt_dispatcher_3args(ctx_ptr, cipher, cipher_len, key32.c_str(), + key32_len, mode.c_str(), mode_len, &decrypted_len); EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + std::string(reinterpret_cast(decrypted_value), decrypted_len)); } TEST(TestGdvFnStubs, TestAesEncryptDecryptValidation) { @@ -1435,16 +1432,14 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptValidation) { std::string cipher = "12345678abcdefgh12345678abcdefghb"; auto cipher_len = static_cast(cipher.length()); - gdv_fn_encrypt_dispatcher_3args(ctx_ptr, data.c_str(), data_len, - key33.c_str(), key33_len, mode.c_str(), - mode_len, &cipher_len); + gdv_fn_encrypt_dispatcher_3args(ctx_ptr, data.c_str(), data_len, key33.c_str(), + key33_len, mode.c_str(), mode_len, &cipher_len); EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Unsupported key length for AES-ECB")); ctx.Reset(); - gdv_fn_decrypt_dispatcher_3args(ctx_ptr, cipher.c_str(), cipher_len, - key33.c_str(), key33_len, mode.c_str(), - mode_len, &decrypted_len); + gdv_fn_decrypt_dispatcher_3args(ctx_ptr, cipher.c_str(), cipher_len, key33.c_str(), + key33_len, mode.c_str(), mode_len, &decrypted_len); EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Unsupported key length for AES-ECB")); ctx.Reset(); @@ -1463,17 +1458,16 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptModeEcb) { auto mode_len = static_cast(mode.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); - const char* cipher = gdv_fn_encrypt_dispatcher_3args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &cipher_len); + const char* cipher = + gdv_fn_encrypt_dispatcher_3args(ctx_ptr, data.c_str(), data_len, key16.c_str(), + key16_len, mode.c_str(), mode_len, &cipher_len); EXPECT_GT(cipher_len, 0); - const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &decrypted_len); + const char* decrypted_value = + gdv_fn_decrypt_dispatcher_3args(ctx_ptr, cipher, cipher_len, key16.c_str(), + key16_len, mode.c_str(), mode_len, &decrypted_len); EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + std::string(reinterpret_cast(decrypted_value), decrypted_len)); } TEST(TestGdvFnStubs, TestAesEncryptDecryptModeValidation) { @@ -1489,23 +1483,19 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptModeValidation) { int64_t ctx_ptr = reinterpret_cast(&ctx); // Test encrypt with invalid mode - gdv_fn_encrypt_dispatcher_3args(ctx_ptr, data.c_str(), data_len, - key16.c_str(), key16_len, - invalid_mode.c_str(), invalid_mode_len, - &cipher_len); - EXPECT_THAT(ctx.get_error(), - ::testing::HasSubstr("Unsupported encryption mode")); + gdv_fn_encrypt_dispatcher_3args(ctx_ptr, data.c_str(), data_len, key16.c_str(), + key16_len, invalid_mode.c_str(), invalid_mode_len, + &cipher_len); + EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Unsupported encryption mode")); ctx.Reset(); // Test decrypt with invalid mode std::string cipher = "12345678abcdefgh12345678abcdefgh"; auto cipher_len_val = static_cast(cipher.length()); - gdv_fn_decrypt_dispatcher_3args(ctx_ptr, cipher.c_str(), cipher_len_val, - key16.c_str(), key16_len, - invalid_mode.c_str(), invalid_mode_len, - &decrypted_len); - EXPECT_THAT(ctx.get_error(), - ::testing::HasSubstr("Unsupported decryption mode")); + gdv_fn_decrypt_dispatcher_3args(ctx_ptr, cipher.c_str(), cipher_len_val, key16.c_str(), + key16_len, invalid_mode.c_str(), invalid_mode_len, + &decrypted_len); + EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Unsupported decryption mode")); ctx.Reset(); } @@ -1525,17 +1515,16 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptGcmIvOnly) { int64_t ctx_ptr = reinterpret_cast(&ctx); const char* cipher = gdv_fn_encrypt_dispatcher_5args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, nullptr, 0, &cipher_len); + ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), mode_len, + iv.c_str(), iv_len, nullptr, 0, &cipher_len); EXPECT_GT(cipher_len, 0); const char* decrypted_value = gdv_fn_decrypt_dispatcher_5args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, nullptr, 0, &decrypted_len); + ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), mode_len, + iv.c_str(), iv_len, nullptr, 0, &decrypted_len); EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + std::string(reinterpret_cast(decrypted_value), decrypted_len)); } TEST(TestGdvFnStubs, TestAesEncryptDecryptGcmWithAad) { @@ -1555,17 +1544,16 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptGcmWithAad) { int64_t ctx_ptr = reinterpret_cast(&ctx); const char* cipher = gdv_fn_encrypt_dispatcher_5args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, aad.c_str(), aad_len, &cipher_len); + ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), mode_len, + iv.c_str(), iv_len, aad.c_str(), aad_len, &cipher_len); EXPECT_GT(cipher_len, 0); const char* decrypted_value = gdv_fn_decrypt_dispatcher_5args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, aad.c_str(), aad_len, &decrypted_len); + ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), mode_len, + iv.c_str(), iv_len, aad.c_str(), aad_len, &decrypted_len); EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + std::string(reinterpret_cast(decrypted_value), decrypted_len)); } // Tests for shorthand mode: AES-ECB (defaults to PKCS7) @@ -1581,18 +1569,17 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptShorthandEcb) { auto mode_len = static_cast(mode.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); - const char* cipher = gdv_fn_encrypt_dispatcher_3args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &cipher_len); + const char* cipher = + gdv_fn_encrypt_dispatcher_3args(ctx_ptr, data.c_str(), data_len, key16.c_str(), + key16_len, mode.c_str(), mode_len, &cipher_len); EXPECT_GT(cipher_len, 0); - const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &decrypted_len); + const char* decrypted_value = + gdv_fn_decrypt_dispatcher_3args(ctx_ptr, cipher, cipher_len, key16.c_str(), + key16_len, mode.c_str(), mode_len, &decrypted_len); EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + std::string(reinterpret_cast(decrypted_value), decrypted_len)); } // Tests for explicit mode: AES-ECB-PKCS7 @@ -1608,18 +1595,17 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptExplicitEcbPkcs7) { auto mode_len = static_cast(mode.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); - const char* cipher = gdv_fn_encrypt_dispatcher_3args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &cipher_len); + const char* cipher = + gdv_fn_encrypt_dispatcher_3args(ctx_ptr, data.c_str(), data_len, key16.c_str(), + key16_len, mode.c_str(), mode_len, &cipher_len); EXPECT_GT(cipher_len, 0); - const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, &decrypted_len); + const char* decrypted_value = + gdv_fn_decrypt_dispatcher_3args(ctx_ptr, cipher, cipher_len, key16.c_str(), + key16_len, mode.c_str(), mode_len, &decrypted_len); EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + std::string(reinterpret_cast(decrypted_value), decrypted_len)); } // Tests for shorthand mode: AES-CBC (defaults to PKCS7) @@ -1638,17 +1624,16 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptShorthandCbc) { int64_t ctx_ptr = reinterpret_cast(&ctx); const char* cipher = gdv_fn_encrypt_dispatcher_4args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, &cipher_len); + ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), mode_len, + iv.c_str(), iv_len, &cipher_len); EXPECT_GT(cipher_len, 0); const char* decrypted_value = gdv_fn_decrypt_dispatcher_4args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, &decrypted_len); + ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), mode_len, + iv.c_str(), iv_len, &decrypted_len); EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + std::string(reinterpret_cast(decrypted_value), decrypted_len)); } // Tests for explicit mode: AES-CBC-PKCS7 @@ -1667,17 +1652,16 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptExplicitCbcPkcs7) { int64_t ctx_ptr = reinterpret_cast(&ctx); const char* cipher = gdv_fn_encrypt_dispatcher_4args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, &cipher_len); + ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), mode_len, + iv.c_str(), iv_len, &cipher_len); EXPECT_GT(cipher_len, 0); const char* decrypted_value = gdv_fn_decrypt_dispatcher_4args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, &decrypted_len); + ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), mode_len, + iv.c_str(), iv_len, &decrypted_len); EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + std::string(reinterpret_cast(decrypted_value), decrypted_len)); } // Tests for explicit mode: AES-CBC-NONE (no padding) @@ -1697,17 +1681,16 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptCbcNone) { int64_t ctx_ptr = reinterpret_cast(&ctx); const char* cipher = gdv_fn_encrypt_dispatcher_4args( - ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, &cipher_len); + ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), mode_len, + iv.c_str(), iv_len, &cipher_len); EXPECT_GT(cipher_len, 0); const char* decrypted_value = gdv_fn_decrypt_dispatcher_4args( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), - mode_len, iv.c_str(), iv_len, &decrypted_len); + ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), mode_len, + iv.c_str(), iv_len, &decrypted_len); EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), - decrypted_len)); + std::string(reinterpret_cast(decrypted_value), decrypted_len)); } } // namespace gandiva diff --git a/cpp/src/gandiva/llvm_generator.cc b/cpp/src/gandiva/llvm_generator.cc index dca4a97079ce..a81bee57226f 100644 --- a/cpp/src/gandiva/llvm_generator.cc +++ b/cpp/src/gandiva/llvm_generator.cc @@ -98,7 +98,7 @@ Status LLVMGenerator::Build(const ExpressionVector& exprs, SelectionVector::Mode // Compile and inject into the process' memory the generated function. ARROW_RETURN_NOT_OK(engine_->FinalizeModule()); - + // setup the jit functions for each expression. for (auto& compiled_expr : compiled_exprs_) { auto fn_name = compiled_expr->GetFunctionName(mode); @@ -419,11 +419,12 @@ Status LLVMGenerator::CodeGenExprValue(DexPtr value_expr, int buffer_count, {arg_context_ptr, output_buffer_ptr_ref, output_offset_ref, loop_var, output_value->data(), output_value->length()}); } else if (output_type_id == arrow::Type::STRUCT) { - auto slot_offset = builder->CreateGEP(types()->IRType(output_type_id), output_ref, loop_var); - builder->CreateStore(output_value->data(), slot_offset); + auto slot_offset = + builder->CreateGEP(types()->IRType(output_type_id), output_ref, loop_var); + builder->CreateStore(output_value->data(), slot_offset); } else if (output_type_id == arrow::Type::LIST) { auto output_list_internal_type = output->Type()->field(0)->type()->id(); - + if (arrow::is_binary_like(output_list_internal_type)) { auto output_list_value = std::dynamic_pointer_cast(output_value); llvm::Value* child_output_offset_ref = GetChildOffsetsReference( @@ -434,21 +435,25 @@ Status LLVMGenerator::CodeGenExprValue(DexPtr value_expr, int buffer_count, child_output_offset_ref, loop_var, output_list_value->data(), output_list_value->child_offsets(), output_list_value->offsets_length()}); } else if (output_list_internal_type == arrow::Type::INT32) { - AddFunctionCall("gdv_fn_populate_list_int32_t_vector", types()->i32_type(), - {arg_context_ptr, output_buffer_ptr_ref, output_offset_ref, - loop_var, output_value->data(), output_value->length(), output_value->validity()}); + AddFunctionCall( + "gdv_fn_populate_list_int32_t_vector", types()->i32_type(), + {arg_context_ptr, output_buffer_ptr_ref, output_offset_ref, loop_var, + output_value->data(), output_value->length(), output_value->validity()}); } else if (output_list_internal_type == arrow::Type::INT64) { - AddFunctionCall("gdv_fn_populate_list_int64_t_vector", types()->i32_type(), - {arg_context_ptr, output_buffer_ptr_ref, output_offset_ref, - loop_var, output_value->data(), output_value->length(), output_value->validity()}); + AddFunctionCall( + "gdv_fn_populate_list_int64_t_vector", types()->i32_type(), + {arg_context_ptr, output_buffer_ptr_ref, output_offset_ref, loop_var, + output_value->data(), output_value->length(), output_value->validity()}); } else if (output_list_internal_type == arrow::Type::FLOAT) { - AddFunctionCall("gdv_fn_populate_list_float_vector", types()->i32_type(), - {arg_context_ptr, output_buffer_ptr_ref, output_offset_ref, - loop_var, output_value->data(), output_value->length(), output_value->validity()}); + AddFunctionCall( + "gdv_fn_populate_list_float_vector", types()->i32_type(), + {arg_context_ptr, output_buffer_ptr_ref, output_offset_ref, loop_var, + output_value->data(), output_value->length(), output_value->validity()}); } else if (output_list_internal_type == arrow::Type::DOUBLE) { - AddFunctionCall("gdv_fn_populate_list_double_vector", types()->i32_type(), - {arg_context_ptr, output_buffer_ptr_ref, output_offset_ref, - loop_var, output_value->data(), output_value->length(), output_value->validity()}); + AddFunctionCall( + "gdv_fn_populate_list_double_vector", types()->i32_type(), + {arg_context_ptr, output_buffer_ptr_ref, output_offset_ref, loop_var, + output_value->data(), output_value->length(), output_value->validity()}); } else { return Status::NotImplemented("list internal type ", output->Type()->field(0)->type()->ToString(), @@ -551,7 +556,7 @@ void LLVMGenerator::ComputeBitMapsForExpr(const CompiledExpr& compiled_expr, /// /// 1. Do the intersection of input/local bitmaps to generate a temporary bitmap. /// 2. copy just the relevant bits from the temporary bitmap to the output bitmap. - + LocalBitMapsHolder bit_map_holder(eval_batch->num_records(), 1); uint8_t* temp_bitmap = bit_map_holder.GetLocalBitMap(0); accumulator.ComputeResult(temp_bitmap); @@ -680,7 +685,7 @@ void LLVMGenerator::Visitor::Visit(const VectorReadFixedLenValueListDex& dex) { auto dt = dex.FieldType(); if (dt->id() == arrow::Type::LIST) { - type = types->IRType(dt->fields()[0]->type()->id() ); + type = types->IRType(dt->fields()[0]->type()->id()); } arrow::Type::type at32 = arrow::Type::INT32; @@ -713,15 +718,17 @@ void LLVMGenerator::Visitor::Visit(const VectorReadFixedLenValueListDex& dex) { llvm::Value* data_list = builder->CreateGEP(type, slot_ref, slot_index); auto list_len_var = builder->CreateIntCast(list_len, types->i64_type(), true); - llvm::Value* vv_end = builder->CreateLoad(generator_->types()->i64_type(),validity_index_var_, "vv_end"); + llvm::Value* vv_end = + builder->CreateLoad(generator_->types()->i64_type(), validity_index_var_, "vv_end"); -llvm::Value* updated_validity_index_var = builder->CreateAdd( - vv_end, list_len_var, "validity_index_var+offset"); + llvm::Value* updated_validity_index_var = + builder->CreateAdd(vv_end, list_len_var, "validity_index_var+offset"); builder->CreateStore(updated_validity_index_var, validity_index_var_); llvm::Value* b_slot_index = builder->CreateAdd(loop_var_, GetSliceOffset(dex.ValidityIdx())); - llvm::Value* b_slot_ref = GetBufferReference(dex.ChildValidityIdx(), kBufferTypeValidity, dex.Field()); + llvm::Value* b_slot_ref = + GetBufferReference(dex.ChildValidityIdx(), kBufferTypeValidity, dex.Field()); llvm::Value* validity = builder->CreateGEP(type32, b_slot_ref, b_slot_index); std::string str3 = "validity:"; @@ -731,9 +738,10 @@ llvm::Value* updated_validity_index_var = builder->CreateAdd( } ADD_VISITOR_TRACE("visit fixed-len data list vector " + dex.FieldName() + " length %T", list_len); - ADD_VISITOR_TRACE("visit fixed-len data list vector " + dex.FieldName() + " updated_validity_index_var %T", + ADD_VISITOR_TRACE("visit fixed-len data list vector " + dex.FieldName() + + " updated_validity_index_var %T", updated_validity_index_var); - + result_.reset(new LValue(data_list, list_len, validity)); } @@ -804,7 +812,7 @@ void LLVMGenerator::Visitor::Visit(const VectorReadVarLenValueListDex& dex) { // => offset_start = offsets[loop_var] slot = builder->CreateGEP(type, offsets_slot_ref, offsets_slot_index); llvm::Value* offset_start = builder->CreateLoad(type, slot, "offset_start"); - + // => offset_end = offsets[loop_var + 1] llvm::Value* offsets_slot_index_next = builder->CreateAdd( offsets_slot_index, generator_->types()->i64_constant(1), "loop_var+1"); @@ -833,7 +841,7 @@ void LLVMGenerator::Visitor::Visit(const VectorReadVarLenValueListDex& dex) { llvm::Value* data_slot_ref = GetBufferReference(dex.DataIdx(), kBufferTypeData, dex.Field()); llvm::Value* data_value = builder->CreateGEP(type, data_slot_ref, child_offset_start); - + result_.reset(new ListLValue(data_value, child_offsets, list_data_length)); } @@ -1035,10 +1043,8 @@ void LLVMGenerator::Visitor::Visit(const NullableInternalFuncDex& dex) { auto params = BuildParams(dex.get_holder_idx(), dex.args(), true, native_function->NeedsContext()); - - auto arrow_return_type = dex.func_descriptor()->return_type(); - + bool passLoopVars = false; for (auto& p : dex.func_descriptor()->params()) { if (p->id() == arrow::Type::LIST) { @@ -1046,10 +1052,10 @@ void LLVMGenerator::Visitor::Visit(const NullableInternalFuncDex& dex) { break; } } - if (passLoopVars) - { + if (passLoopVars) { params.push_back(loop_var_); - auto valid_var = builder->CreateLoad(types->i64_type(), validity_index_var_, "loaded_var"); + auto valid_var = + builder->CreateLoad(types->i64_type(), validity_index_var_, "loaded_var"); params.push_back(valid_var); } @@ -1486,13 +1492,12 @@ LValuePtr LLVMGenerator::Visitor::BuildFunctionCall(const NativeFunction* func, } if (arrow_return_type_id == arrow::Type::LIST) { - result_len_ptr = new llvm::AllocaInst(generator_->types()->i32_type(), 0, "result_len", entry_block_); params->push_back(result_len_ptr); has_arena_allocs_ = true; valid_ptr = new llvm::AllocaInst(generator_->types()->i32_ptr_type(), 0, - "valid_ptr", entry_block_); + "valid_ptr", entry_block_); params->push_back(valid_ptr); } @@ -1506,7 +1511,7 @@ LValuePtr LLVMGenerator::Visitor::BuildFunctionCall(const NativeFunction* func, (result_len_ptr == nullptr) ? nullptr : builder->CreateLoad(result_len_ptr->getAllocatedType(), result_len_ptr); - auto validity = + auto validity = (valid_ptr == nullptr) ? nullptr : builder->CreateLoad(generator_->types()->i32_ptr_type(), valid_ptr); @@ -1550,7 +1555,6 @@ std::vector LLVMGenerator::Visitor::BuildParams( // append all the parameters corresponding to this LValue. result_ref.AppendFunctionParams(¶ms); - // build validity. if (with_validity) { llvm::Value* validity_expr = BuildCombinedValidity(pair->validity_exprs()); diff --git a/cpp/src/gandiva/llvm_types.h b/cpp/src/gandiva/llvm_types.h index 98e40667ece7..269c021f1344 100644 --- a/cpp/src/gandiva/llvm_types.h +++ b/cpp/src/gandiva/llvm_types.h @@ -46,7 +46,9 @@ class GANDIVA_EXPORT LLVMTypes { llvm::Type* i128_type() { return llvm::Type::getInt128Ty(context_); } - llvm::VectorType* list_type() { return llvm::ScalableVectorType::get(i8_type(), (unsigned int)0); } + llvm::VectorType* list_type() { + return llvm::ScalableVectorType::get(i8_type(), (unsigned int)0); + } llvm::StructType* i128_split_type() { // struct with high/low bits (see decimal_ops.cc:DecimalSplit) @@ -95,9 +97,7 @@ class GANDIVA_EXPORT LLVMTypes { return llvm::ConstantFP::get(float_type(), val); } - llvm::LLVMContext* get_context() { - return &context_; - } + llvm::LLVMContext* get_context() { return &context_; } llvm::Constant* double_constant(double val) { return llvm::ConstantFP::get(double_type(), val); @@ -121,7 +121,7 @@ class GANDIVA_EXPORT LLVMTypes { // offsets buffer is to separate data into list // not support nested list if (data_type->id() == arrow::Type::LIST) { - //Nested lists aren't supported yet. + // Nested lists aren't supported yet. if (data_type->field(0)->type()->id() == arrow::Type::LIST) { return NULL; } diff --git a/cpp/src/gandiva/lvalue.h b/cpp/src/gandiva/lvalue.h index 04862dc9d18c..43ef29414614 100644 --- a/cpp/src/gandiva/lvalue.h +++ b/cpp/src/gandiva/lvalue.h @@ -53,7 +53,7 @@ class GANDIVA_EXPORT LValue { virtual std::string to_string() { std::string s = "Base LValue"; - + std::string str1 = "data:"; if (data_) { llvm::raw_string_ostream output1(str1); @@ -107,8 +107,7 @@ class GANDIVA_EXPORT ListLValue : public LValue { llvm::Value* validity = NULLPTR) : LValue(data, NULLPTR, validity), child_offsets_(child_offsets), - offsets_length_(offsets_length) { - } + offsets_length_(offsets_length) {} llvm::Value* child_offsets() { return child_offsets_; } diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h index 1277ccff3399..c0a914c47617 100644 --- a/cpp/src/gandiva/precompiled/types.h +++ b/cpp/src/gandiva/precompiled/types.h @@ -19,7 +19,6 @@ #include - #include "gandiva/array_ops.h" #include "gandiva/gdv_function_stubs.h" diff --git a/cpp/src/gandiva/projector.cc b/cpp/src/gandiva/projector.cc index 64c283e9fe03..0979abc5f7cb 100644 --- a/cpp/src/gandiva/projector.cc +++ b/cpp/src/gandiva/projector.cc @@ -147,7 +147,7 @@ Status Projector::Evaluate(const arrow::RecordBatch& batch, ++idx; } ARROW_RETURN_NOT_OK( - llvm_generator_->Execute(batch, selection_vector, output_data_vecs)); + llvm_generator_->Execute(batch, selection_vector, output_data_vecs)); return Status::OK(); } @@ -197,15 +197,20 @@ Status Projector::Evaluate(const arrow::RecordBatch& batch, * Otherwise, child data offsets buffer length is data length + 1 * and offset data is int32_t, need use buffer->size()/4 - 1 */ - child_data_size = child_data->buffers[child_data_buffer_index]->size() / int_data_size - 1; + child_data_size = + child_data->buffers[child_data_buffer_index]->size() / int_data_size - 1; } else if (child_data->type->id() == arrow::Type::INT32) { - child_data_size = child_data->buffers[child_data_buffer_index]->size() / int_data_size; + child_data_size = + child_data->buffers[child_data_buffer_index]->size() / int_data_size; } else if (child_data->type->id() == arrow::Type::INT64) { - child_data_size = child_data->buffers[child_data_buffer_index]->size() / double_data_size; + child_data_size = + child_data->buffers[child_data_buffer_index]->size() / double_data_size; } else if (child_data->type->id() == arrow::Type::FLOAT) { - child_data_size = child_data->buffers[child_data_buffer_index]->size() / int_data_size; + child_data_size = + child_data->buffers[child_data_buffer_index]->size() / int_data_size; } else if (child_data->type->id() == arrow::Type::DOUBLE) { - child_data_size = child_data->buffers[child_data_buffer_index]->size() / double_data_size; + child_data_size = + child_data->buffers[child_data_buffer_index]->size() / double_data_size; } auto new_child_data = arrow::ArrayData::Make( child_data->type, child_data_size, child_data->buffers, child_data->offset); @@ -278,14 +283,16 @@ Status Projector::AllocArrayData(const DataTypePtr& type, int64_t num_records, } buffers.push_back(std::move(data_buffer)); - ARROW_ASSIGN_OR_RAISE(auto data_valid_buffer, arrow::AllocateResizableBuffer(data_len, pool)); + ARROW_ASSIGN_OR_RAISE(auto data_valid_buffer, + arrow::AllocateResizableBuffer(data_len, pool)); if (type->id() == arrow::Type::LIST) { auto internal_type = type->field(0)->type(); ArrayDataPtr child_data; if (arrow::is_primitive(internal_type->id())) { - child_data = arrow::ArrayData::Make(internal_type, 0 /*initialize length*/, - {std::move(data_valid_buffer), std::move(buffers[2])}, 0); + child_data = arrow::ArrayData::Make( + internal_type, 0 /*initialize length*/, + {std::move(data_valid_buffer), std::move(buffers[2])}, 0); } if (arrow::is_binary_like(internal_type->id())) { child_data = arrow::ArrayData::Make( @@ -349,8 +356,7 @@ Status Projector::ValidateArrayDataCapacity(const arrow::ArrayData& array_data, Status::Invalid("Data buffer too small for ", field.name())); } else if (type_id == arrow::Type::LIST) { return Status::OK(); - } - else { + } else { return Status::Invalid("Unsupported output data type " + field.type()->ToString()); } diff --git a/cpp/src/gandiva/projector.h b/cpp/src/gandiva/projector.h index da81e79e535c..c92394ead570 100644 --- a/cpp/src/gandiva/projector.h +++ b/cpp/src/gandiva/projector.h @@ -133,7 +133,6 @@ class GANDIVA_EXPORT Projector { Projector(std::unique_ptr llvm_generator, SchemaPtr schema, const FieldVector& output_fields, std::shared_ptr); - /// Validate that the ArrayData has sufficient capacity to accommodate 'num_records'. Status ValidateArrayDataCapacity(const arrow::ArrayData& array_data, const arrow::Field& field, int64_t num_records) const; diff --git a/cpp/src/gandiva/tests/list_test.cc b/cpp/src/gandiva/tests/list_test.cc index abc7b5d7091b..a76428bea740 100644 --- a/cpp/src/gandiva/tests/list_test.cc +++ b/cpp/src/gandiva/tests/list_test.cc @@ -83,10 +83,10 @@ void _build_list_array(const vector& values, const vector& l template void _build_list_array2(const vector& values, const vector& length, - const vector& validity, const vector& innerValidity, arrow::MemoryPool* pool, - ArrayPtr* array) { - return _build_list_array(values, length, validity, pool, array); - } + const vector& validity, const vector& innerValidity, + arrow::MemoryPool* pool, ArrayPtr* array) { + return _build_list_array(values, length, validity, pool, array); +} /* * expression: @@ -136,7 +136,7 @@ void _test_list_type_field_alias(DataTypePtr type, ArrayPtr array, TEST_F(TestList, TestArrayRemove) { // schema for input fields auto field_b = field("b", int32()); - + auto field_a = field("a", list(int32())); auto schema = arrow::schema({field_a, field_b}); @@ -147,7 +147,7 @@ TEST_F(TestList, TestArrayRemove) { int num_records = 2; auto array_b = MakeArrowArrayInt32({42, 42}, {true, true}); - + ArrayPtr array_a; _build_list_array2( {10, 42, 30, 42, 70, 80}, @@ -202,7 +202,7 @@ auto bitmap_buffer2 = arrow::AllocateBuffer(size, pool_); auto offsets_buffer2 = arrow::AllocateBuffer(offsets_len, pool_); buffers2.push_back(*std::move(offsets_buffer2)); std::shared_ptr dt2 = std::make_shared(); - + auto array_data_child = arrow::ArrayData::Make(dt2, num_records2, buffers2, 0, 0); array_data_child->buffers = std::move(buffers2); @@ -214,7 +214,7 @@ auto array_data = arrow::ArrayData::Make(dt, num_records2, buffers, kids, 0, 0); array_data->buffers = std::move(buffers); outputs2.push_back(array_data); - + status = projector->Evaluate(*(in_batch.get()), outputs2); EXPECT_TRUE(status.ok()) << status.message(); arrow::ArrayData ad = *outputs2.at(0); @@ -243,7 +243,7 @@ for (auto& array_data : outputs2) { array_data = arrow::ArrayData::Make(array_data->type, array_data->length, array_data->buffers, {new_child_data}, array_data->null_count, array_data->offset); - + auto newArray = arrow::MakeArray(array_data); //arrow::ArraySpan sp(newArray); @@ -313,9 +313,9 @@ TEST_F(TestList, TestListInt32LiteralContains) { auto node2 = TreeExprBuilder::MakeLiteral(42); field_nodes.push_back(node2); - - auto func_node = TreeExprBuilder::MakeFunction("array_contains", field_nodes, res->type()); - auto expr = TreeExprBuilder::MakeExpression(func_node, res); + + auto func_node = TreeExprBuilder::MakeFunction("array_contains", field_nodes, +res->type()); auto expr = TreeExprBuilder::MakeExpression(func_node, res); //////// // Build a projector for the expressions. diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc index e0c2e5164a5f..3fbe80d4cc34 100644 --- a/cpp/src/gandiva/tests/projector_test.cc +++ b/cpp/src/gandiva/tests/projector_test.cc @@ -2823,11 +2823,10 @@ TEST_F(TestProjector, TestAesEncryptDecrypt) { const char* key_24_bytes = "12345678abcdefgh12345678"; const char* key_32_bytes = "12345678abcdefgh12345678abcdefgh"; - auto array_data = MakeArrowArrayUtf8({"abc", "some words", "to be encrypted"}, - {true, true, true}); + auto array_data = + MakeArrowArrayUtf8({"abc", "some words", "to be encrypted"}, {true, true, true}); auto array_key = - MakeArrowArrayUtf8({key_16_bytes, key_24_bytes, key_32_bytes}, - {true, true, true}); + MakeArrowArrayUtf8({key_16_bytes, key_24_bytes, key_32_bytes}, {true, true, true}); auto array_holder_en = MakeArrowArrayUtf8({"", "", ""}, {true, true, true}); From fb0d878f702b772a162c36bbfdcbef512eee6b08 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Wed, 25 Feb 2026 23:58:26 -0600 Subject: [PATCH 03/18] Fix CMake formatting --- cpp/src/gandiva/tests/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/gandiva/tests/CMakeLists.txt b/cpp/src/gandiva/tests/CMakeLists.txt index 09428a870567..48aec99a114b 100644 --- a/cpp/src/gandiva/tests/CMakeLists.txt +++ b/cpp/src/gandiva/tests/CMakeLists.txt @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. - add_gandiva_test(projector-test SOURCES binary_test.cc From 0848921df45be37d73c1fb73469ea7cb06d0ae9c Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Wed, 25 Feb 2026 23:59:20 -0600 Subject: [PATCH 04/18] Fix C++ comment alignment in array_ops.cc --- cpp/src/gandiva/array_ops.cc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cpp/src/gandiva/array_ops.cc b/cpp/src/gandiva/array_ops.cc index cc6e9ef281a7..c05fe828f07c 100644 --- a/cpp/src/gandiva/array_ops.cc +++ b/cpp/src/gandiva/array_ops.cc @@ -223,8 +223,8 @@ arrow::Status ExportedArrayFunctions::AddMappings(Engine* engine) const { types->i32_type(), // int32_t value to check for types->i1_type(), // bool validity --Needed? types->i64_type(), // in loop var --Needed? - types->i64_type(), // in validity_index_var index into the valdity vector for the - // current row. + types->i64_type(), // in validity_index_var index into the valdity vector for the + // current row. types->i1_ptr_type() // output validity for the row }; @@ -241,8 +241,8 @@ arrow::Status ExportedArrayFunctions::AddMappings(Engine* engine) const { types->i64_type(), // int32_t value to check for types->i1_type(), // bool validity --Needed? types->i64_type(), // in loop var --Needed? - types->i64_type(), // in validity_index_var index into the valdity vector for the - // current row. + types->i64_type(), // in validity_index_var index into the valdity vector for the + // current row. types->i1_ptr_type() // output validity for the row }; @@ -259,8 +259,8 @@ arrow::Status ExportedArrayFunctions::AddMappings(Engine* engine) const { types->float_type(), // int32_t value to check for types->i1_type(), // bool validity --Needed? types->i64_type(), // in loop var --Needed? - types->i64_type(), // in validity_index_var index into the valdity vector for the - // current row. + types->i64_type(), // in validity_index_var index into the valdity vector for the + // current row. types->i1_ptr_type() // output validity for the row }; @@ -277,8 +277,8 @@ arrow::Status ExportedArrayFunctions::AddMappings(Engine* engine) const { types->double_type(), // int32_t value to check for types->i1_type(), // bool validity --Needed? types->i64_type(), // in loop var --Needed? - types->i64_type(), // in validity_index_var index into the valdity vector for the - // current row. + types->i64_type(), // in validity_index_var index into the valdity vector for the + // current row. types->i1_ptr_type() // output validity for the row }; From 6d132dd6dda88854d8830bf56b43da1d145e2f95 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Thu, 26 Feb 2026 00:03:07 -0600 Subject: [PATCH 05/18] Upgrade GoogleTest from 1.11.0 to 1.17.0 for CMake 4.2+ compatibility --- cpp/thirdparty/versions.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 736c13d4522a..38555a924457 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -76,8 +76,8 @@ ARROW_GOOGLE_CLOUD_CPP_BUILD_VERSION=v2.22.0 ARROW_GOOGLE_CLOUD_CPP_BUILD_SHA256_CHECKSUM=0c68782e57959c82e0c81def805c01460a042c1aae0c2feee905acaa2a2dc9bf ARROW_GRPC_BUILD_VERSION=v1.46.3 ARROW_GRPC_BUILD_SHA256_CHECKSUM=d6cbf22cb5007af71b61c6be316a79397469c58c82a942552a62e708bce60964 -ARROW_GTEST_BUILD_VERSION=1.11.0 -ARROW_GTEST_BUILD_SHA256_CHECKSUM=b4870bf121ff7795ba20d20bcdd8627b8e088f2d1dab299a031c1034eddc93d5 +ARROW_GTEST_BUILD_VERSION=1.17.0 +ARROW_GTEST_BUILD_SHA256_CHECKSUM=d5558cd419c8d46bdc958064cb97f963d1ea793866414c025906ec15033512ed ARROW_JEMALLOC_BUILD_VERSION=5.3.0 ARROW_JEMALLOC_BUILD_SHA256_CHECKSUM=2db82d1e7119df3e71b7640219b6dfe84789bc0537983c3b7ac4f7189aecfeaa ARROW_LZ4_BUILD_VERSION=v1.10.0 From cbc88fcfea1b59d061abb080c16975f23b2f288b Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Thu, 26 Feb 2026 00:05:58 -0600 Subject: [PATCH 06/18] Upgrade actions/cache from v4.0.0 to v4.3.0 across all workflows --- .github/workflows/cpp.yml | 8 ++++---- .github/workflows/dev.yml | 2 +- .github/workflows/docs.yml | 2 +- .github/workflows/docs_light.yml | 2 +- .github/workflows/integration.yml | 2 +- .github/workflows/java.yml | 2 +- .github/workflows/java_jni.yml | 4 ++-- .github/workflows/js.yml | 4 ++-- .github/workflows/matlab.yml | 6 +++--- .github/workflows/python.yml | 4 ++-- .github/workflows/r.yml | 4 ++-- .github/workflows/r_nightly.yml | 2 +- .github/workflows/ruby.yml | 10 +++++----- 13 files changed, 26 insertions(+), 26 deletions(-) diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index 93bc723cd430..49177aa228a0 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -136,7 +136,7 @@ jobs: fetch-depth: 0 submodules: recursive - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: .docker key: ${{ matrix.image }}-${{ hashFiles('cpp/**') }} @@ -259,7 +259,7 @@ jobs: run: | echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT - name: Cache ccache - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: ${{ steps.ccache-info.outputs.cache-dir }} key: cpp-ccache-macos-${{ matrix.macos-version }}-${{ hashFiles('cpp/**') }} @@ -351,7 +351,7 @@ jobs: run: | echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT - name: Cache ccache - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: ${{ steps.ccache-info.outputs.cache-dir }} key: cpp-ccache-windows-${{ env.CACHE_VERSION }}-${{ hashFiles('cpp/**') }} @@ -441,7 +441,7 @@ jobs: shell: msys2 {0} run: ci/scripts/msys2_setup.sh cpp - name: Cache ccache - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: ccache key: cpp-ccache-${{ matrix.msystem_lower}}-${{ hashFiles('cpp/**') }} diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index d2436fe3c452..5d9ad9bab0a0 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -53,7 +53,7 @@ jobs: python -m pip install pre-commit pre-commit run --show-diff-on-failure --color=always - name: Cache pre-commit - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: ~/.cache/pre-commit key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }} diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 1219f7526f9f..6fc6fa28fdb2 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -46,7 +46,7 @@ jobs: run: | ci/scripts/util_free_space.sh - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: .docker key: debian-docs-${{ hashFiles('cpp/**') }} diff --git a/.github/workflows/docs_light.yml b/.github/workflows/docs_light.yml index 7d540b7cecdc..a13e29ce0dd1 100644 --- a/.github/workflows/docs_light.yml +++ b/.github/workflows/docs_light.yml @@ -53,7 +53,7 @@ jobs: with: fetch-depth: 0 - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: .docker key: conda-docs-${{ hashFiles('cpp/**') }} diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index af9a98ed437f..ea2dfe2c5710 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -90,7 +90,7 @@ jobs: run: | ci/scripts/util_free_space.sh - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: .docker key: conda-${{ hashFiles('cpp/**') }} diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml index 6c0cf0991168..462538cbcdfd 100644 --- a/.github/workflows/java.yml +++ b/.github/workflows/java.yml @@ -72,7 +72,7 @@ jobs: fetch-depth: 0 submodules: recursive - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: .docker key: maven-${{ hashFiles('java/**') }} diff --git a/.github/workflows/java_jni.yml b/.github/workflows/java_jni.yml index 56aa1d099288..a59d87dbf5ab 100644 --- a/.github/workflows/java_jni.yml +++ b/.github/workflows/java_jni.yml @@ -66,7 +66,7 @@ jobs: run: | ci/scripts/util_free_space.sh - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: .docker key: java-jni-manylinux-2014-${{ hashFiles('cpp/**', 'java/**') }} @@ -108,7 +108,7 @@ jobs: fetch-depth: 0 submodules: recursive - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: .docker key: maven-${{ hashFiles('java/**') }} diff --git a/.github/workflows/js.yml b/.github/workflows/js.yml index a51ad867aa70..2619b85d27e2 100644 --- a/.github/workflows/js.yml +++ b/.github/workflows/js.yml @@ -95,7 +95,7 @@ jobs: with: fetch-depth: 0 - name: Jest Cache - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: js/.jest-cache key: js-jest-cache-${{ runner.os }}-${{ hashFiles('js/src/**/*.ts', 'js/test/**/*.ts', 'js/yarn.lock') }} @@ -126,7 +126,7 @@ jobs: with: fetch-depth: 0 - name: Jest Cache - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: js/.jest-cache key: js-jest-cache-${{ runner.os }}-${{ hashFiles('js/src/**/*.ts', 'js/test/**/*.ts', 'js/yarn.lock') }} diff --git a/.github/workflows/matlab.yml b/.github/workflows/matlab.yml index 7d217b07ad7d..6d449ebc8f5c 100644 --- a/.github/workflows/matlab.yml +++ b/.github/workflows/matlab.yml @@ -81,7 +81,7 @@ jobs: shell: bash run: echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT - name: Cache ccache - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: ${{ steps.ccache-info.outputs.cache-dir }} key: matlab-ccache-ubuntu-${{ hashFiles('cpp/**', 'matlab/**') }} @@ -129,7 +129,7 @@ jobs: shell: bash run: echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT - name: Cache ccache - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: ${{ steps.ccache-info.outputs.cache-dir }} key: matlab-ccache-macos-${{ hashFiles('cpp/**', 'matlab/**') }} @@ -172,7 +172,7 @@ jobs: shell: bash run: echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT - name: Cache ccache - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: | ${{ steps.ccache-info.outputs.cache-dir }} diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 84c8a6553b00..db0c5de49879 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -103,7 +103,7 @@ jobs: fetch-depth: 0 submodules: recursive - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: .docker key: ${{ matrix.cache }}-${{ hashFiles('cpp/**') }} @@ -200,7 +200,7 @@ jobs: shell: bash run: echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT - name: Cache ccache - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: ${{ steps.ccache-info.outputs.cache-dir }} key: python-ccache-macos-${{ matrix.macos-version }}-${{ hashFiles('cpp/**', 'python/**') }} diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index 1897f332f750..f9cf790acd7a 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -138,7 +138,7 @@ jobs: run: | ci/scripts/util_free_space.sh - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: .docker # As this key is identical on both matrix builds only one will be able to successfully cache, @@ -268,7 +268,7 @@ jobs: ci/scripts/ccache_setup.sh echo "CCACHE_DIR=$(cygpath --absolute --windows ccache)" >> $GITHUB_ENV - name: Cache ccache - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: ccache key: r-${{ matrix.config.rtools }}-ccache-mingw-${{ matrix.config.arch }}-${{ hashFiles('cpp/src/**/*.cc','cpp/src/**/*.h)') }}-${{ github.run_id }} diff --git a/.github/workflows/r_nightly.yml b/.github/workflows/r_nightly.yml index 9817e41d3b61..33db17f0db0f 100644 --- a/.github/workflows/r_nightly.yml +++ b/.github/workflows/r_nightly.yml @@ -86,7 +86,7 @@ jobs: exit 1 fi - name: Cache Repo - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: repo key: r-nightly-${{ github.run_id }} diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml index 13da7e62ee0c..d1d56379af63 100644 --- a/.github/workflows/ruby.yml +++ b/.github/workflows/ruby.yml @@ -79,7 +79,7 @@ jobs: fetch-depth: 0 submodules: recursive - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: .docker key: ubuntu-${{ matrix.ubuntu }}-ruby-${{ hashFiles('cpp/**') }} @@ -168,7 +168,7 @@ jobs: run: | echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT - name: Cache ccache - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: ${{ steps.ccache-info.outputs.cache-dir }} key: ruby-ccache-macos-${{ hashFiles('cpp/**') }} @@ -251,7 +251,7 @@ jobs: run: | ridk exec bash ci\scripts\msys2_setup.sh ruby - name: Cache ccache - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: ccache key: ruby-ccache-ucrt${{ matrix.mingw-n-bits }}-${{ hashFiles('cpp/**') }} @@ -275,7 +275,7 @@ jobs: Write-Output "gem-dir=$(ridk exec gem env gemdir)" | ` Out-File -FilePath $env:GITHUB_OUTPUT -Encoding utf8 -Append - name: Cache RubyGems - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: ${{ steps.rubygems-info.outputs.gem-dir }} key: ruby-rubygems-ucrt${{ matrix.mingw-n-bits }}-${{ hashFiles('**/Gemfile', 'ruby/*/*.gemspec') }} @@ -384,7 +384,7 @@ jobs: run: | echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT - name: Cache ccache - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 with: path: ${{ steps.ccache-info.outputs.cache-dir }} key: glib-ccache-msvc-${{ env.CACHE_VERSION }}-${{ hashFiles('cpp/**') }} From 8de79dcec8a37c6c094bd6ceec0439da0ac150ad Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Thu, 26 Feb 2026 00:19:40 -0600 Subject: [PATCH 07/18] Fix C++ lint errors in Gandiva code - Remove redundant 'virtual' keyword from lvalue.h:123 (already has 'override') - Replace 'unsigned long' with 'auto' in encrypt_utils_common.cc to avoid C type lint warning --- cpp/src/gandiva/encrypt_utils_common.cc | 3 +-- cpp/src/gandiva/lvalue.h | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/cpp/src/gandiva/encrypt_utils_common.cc b/cpp/src/gandiva/encrypt_utils_common.cc index 6eab3e84ac62..73210037c0f1 100644 --- a/cpp/src/gandiva/encrypt_utils_common.cc +++ b/cpp/src/gandiva/encrypt_utils_common.cc @@ -24,11 +24,10 @@ namespace gandiva { std::string get_openssl_error_string() { std::string error_string; - unsigned long error_code; char error_buffer[256]; // Loop through all errors in the queue - while ((error_code = ERR_get_error()) != 0) { + for (auto error_code = ERR_get_error(); error_code != 0; error_code = ERR_get_error()) { if (!error_string.empty()) { error_string += "; "; } diff --git a/cpp/src/gandiva/lvalue.h b/cpp/src/gandiva/lvalue.h index 43ef29414614..bc97b6b4073c 100644 --- a/cpp/src/gandiva/lvalue.h +++ b/cpp/src/gandiva/lvalue.h @@ -120,7 +120,7 @@ class GANDIVA_EXPORT ListLValue : public LValue { params->push_back(validity_); } - virtual std::string to_string() override { + std::string to_string() override { std::string s = "List LValue"; s += " " + LValue::to_string(); From 68663961f91b2d3a8c4efb3285b2c71c6bdbfb09 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Thu, 26 Feb 2026 00:23:10 -0600 Subject: [PATCH 08/18] Fix macOS runner configuration for Java JARs workflow Change from deprecated 'macos-13' to 'macos-13-large' for x86_64 builds. The 'macos-13-us-default' configuration is no longer supported by GitHub Actions. --- dev/tasks/java-jars/github.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml index e1fa8506af00..57fe7454edd8 100644 --- a/dev/tasks/java-jars/github.yml +++ b/dev/tasks/java-jars/github.yml @@ -278,7 +278,7 @@ jobs: fail-fast: false matrix: platform: - - { runs_on: macos-13, arch: "x86_64"} + - { runs_on: macos-13-large, arch: "x86_64"} needs: - build-cpp-ubuntu - build-cpp-macos From d315588453a068f5481495ff8bcc75b0b8f1d02e Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Thu, 26 Feb 2026 00:26:56 -0600 Subject: [PATCH 09/18] Fix macOS 13 runner configuration across all workflows Replace deprecated 'macos-13' with 'macos-13-large' for AMD64 builds in: - C++ workflow (.github/workflows/cpp.yml) - Python workflow (.github/workflows/python.yml) - MATLAB workflow (.github/workflows/matlab.yml) The 'macos-13-us-default' configuration is no longer supported by GitHub Actions. Using explicit runs-on matrix values to specify macos-13-large for x86_64 and macos-14 for ARM64. --- .github/workflows/cpp.yml | 4 +++- .github/workflows/matlab.yml | 4 +++- .github/workflows/python.yml | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index 49177aa228a0..87baebea129f 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -191,7 +191,7 @@ jobs: macos: name: ${{ matrix.architecture }} macOS ${{ matrix.macos-version }} C++ - runs-on: macos-${{ matrix.macos-version }} + runs-on: ${{ matrix.runs-on }} if: ${{ !contains(github.event.pull_request.title, 'WIP') }} timeout-minutes: 75 strategy: @@ -200,8 +200,10 @@ jobs: include: - architecture: AMD64 macos-version: "13" + runs-on: macos-13-large - architecture: ARM64 macos-version: "14" + runs-on: macos-14 env: ARROW_AZURE: ON ARROW_BUILD_TESTS: ON diff --git a/.github/workflows/matlab.yml b/.github/workflows/matlab.yml index 6d449ebc8f5c..8389c8fc8dc5 100644 --- a/.github/workflows/matlab.yml +++ b/.github/workflows/matlab.yml @@ -99,15 +99,17 @@ jobs: strict: true macos: name: ${{ matrix.architecture }} macOS ${{ matrix.macos-version }} MATLAB - runs-on: macos-${{ matrix.macos-version }} + runs-on: ${{ matrix.runs-on }} if: ${{ !contains(github.event.pull_request.title, 'WIP') }} strategy: matrix: include: - architecture: AMD64 macos-version: "13" + runs-on: macos-13-large - architecture: ARM64 macos-version: "14" + runs-on: macos-14 steps: - name: Check out repository uses: actions/checkout@v4 diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index db0c5de49879..b1e69db74fc3 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -135,7 +135,7 @@ jobs: macos: name: ${{ matrix.architecture }} macOS ${{ matrix.macos-version }} Python 3 - runs-on: macos-${{ matrix.macos-version }} + runs-on: ${{ matrix.runs-on }} if: ${{ !contains(github.event.pull_request.title, 'WIP') }} timeout-minutes: 60 strategy: @@ -144,8 +144,10 @@ jobs: include: - architecture: AMD64 macos-version: "13" + runs-on: macos-13-large - architecture: ARM64 macos-version: "14" + runs-on: macos-14 env: ARROW_HOME: /tmp/local ARROW_AZURE: ON From b821f683a4c958bce6306386785137195d845c22 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Thu, 26 Feb 2026 00:28:43 -0600 Subject: [PATCH 10/18] Fix macOS runner to use macos-14-large instead of non-existent macos-13-large According to GitHub Actions runner-images documentation, macos-13 and macos-13-large do not exist. The available x86_64 (AMD64) runners are: - macos-14-large (oldest available) - macos-15-large - macos-26-large (newest) Updated all workflows to use macos-14-large for AMD64 builds: - C++ workflow (.github/workflows/cpp.yml) - Python workflow (.github/workflows/python.yml) - MATLAB workflow (.github/workflows/matlab.yml) - Java JARs workflow (dev/tasks/java-jars/github.yml) --- .github/workflows/cpp.yml | 4 ++-- .github/workflows/matlab.yml | 4 ++-- .github/workflows/python.yml | 4 ++-- dev/tasks/java-jars/github.yml | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index 87baebea129f..46ea95f8627d 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -199,8 +199,8 @@ jobs: matrix: include: - architecture: AMD64 - macos-version: "13" - runs-on: macos-13-large + macos-version: "14" + runs-on: macos-14-large - architecture: ARM64 macos-version: "14" runs-on: macos-14 diff --git a/.github/workflows/matlab.yml b/.github/workflows/matlab.yml index 8389c8fc8dc5..e5d33be2e7e4 100644 --- a/.github/workflows/matlab.yml +++ b/.github/workflows/matlab.yml @@ -105,8 +105,8 @@ jobs: matrix: include: - architecture: AMD64 - macos-version: "13" - runs-on: macos-13-large + macos-version: "14" + runs-on: macos-14-large - architecture: ARM64 macos-version: "14" runs-on: macos-14 diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index b1e69db74fc3..b513c0c17a06 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -143,8 +143,8 @@ jobs: matrix: include: - architecture: AMD64 - macos-version: "13" - runs-on: macos-13-large + macos-version: "14" + runs-on: macos-14-large - architecture: ARM64 macos-version: "14" runs-on: macos-14 diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml index 57fe7454edd8..4f3089ac3e5f 100644 --- a/dev/tasks/java-jars/github.yml +++ b/dev/tasks/java-jars/github.yml @@ -278,7 +278,7 @@ jobs: fail-fast: false matrix: platform: - - { runs_on: macos-13-large, arch: "x86_64"} + - { runs_on: macos-14-large, arch: "x86_64"} needs: - build-cpp-ubuntu - build-cpp-macos From e36d0fbe69596442e31cfd9d5839cd8d57d2f08e Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Fri, 27 Feb 2026 09:48:18 -0600 Subject: [PATCH 11/18] Fix GoogleTest 1.17.0 download URL and checksum Update to match upstream Apache Arrow main branch: - Use correct URL format: /releases/download/v.../googletest-... instead of /archive/release-... - Use correct SHA256 checksum: 65fab701... (for googletest-1.17.0.tar.gz from releases) instead of d5558cd4... (for archive/release-1.17.0.tar.gz) This fixes the 404 error when downloading GoogleTest from Apache JFrog Artifactory, as the mirror expects the same file format as the upstream GitHub release. --- cpp/thirdparty/versions.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 38555a924457..2134841a5387 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -77,7 +77,7 @@ ARROW_GOOGLE_CLOUD_CPP_BUILD_SHA256_CHECKSUM=0c68782e57959c82e0c81def805c01460a0 ARROW_GRPC_BUILD_VERSION=v1.46.3 ARROW_GRPC_BUILD_SHA256_CHECKSUM=d6cbf22cb5007af71b61c6be316a79397469c58c82a942552a62e708bce60964 ARROW_GTEST_BUILD_VERSION=1.17.0 -ARROW_GTEST_BUILD_SHA256_CHECKSUM=d5558cd419c8d46bdc958064cb97f963d1ea793866414c025906ec15033512ed +ARROW_GTEST_BUILD_SHA256_CHECKSUM=65fab701d9829d38cb77c14acdc431d2108bfdbf8979e40eb8ae567edf10b27c ARROW_JEMALLOC_BUILD_VERSION=5.3.0 ARROW_JEMALLOC_BUILD_SHA256_CHECKSUM=2db82d1e7119df3e71b7640219b6dfe84789bc0537983c3b7ac4f7189aecfeaa ARROW_LZ4_BUILD_VERSION=v1.10.0 @@ -151,7 +151,7 @@ DEPENDENCIES=( "ARROW_GLOG_URL glog-${ARROW_GLOG_BUILD_VERSION}.tar.gz https://github.com/google/glog/archive/${ARROW_GLOG_BUILD_VERSION}.tar.gz" "ARROW_GOOGLE_CLOUD_CPP_URL google-cloud-cpp-${ARROW_GOOGLE_CLOUD_CPP_BUILD_VERSION}.tar.gz https://github.com/googleapis/google-cloud-cpp/archive/${ARROW_GOOGLE_CLOUD_CPP_BUILD_VERSION}.tar.gz" "ARROW_GRPC_URL grpc-${ARROW_GRPC_BUILD_VERSION}.tar.gz https://github.com/grpc/grpc/archive/${ARROW_GRPC_BUILD_VERSION}.tar.gz" - "ARROW_GTEST_URL gtest-${ARROW_GTEST_BUILD_VERSION}.tar.gz https://github.com/google/googletest/archive/release-${ARROW_GTEST_BUILD_VERSION}.tar.gz" + "ARROW_GTEST_URL gtest-${ARROW_GTEST_BUILD_VERSION}.tar.gz https://github.com/google/googletest/releases/download/v${ARROW_GTEST_BUILD_VERSION}/googletest-${ARROW_GTEST_BUILD_VERSION}.tar.gz" "ARROW_JEMALLOC_URL jemalloc-${ARROW_JEMALLOC_BUILD_VERSION}.tar.bz2 https://github.com/jemalloc/jemalloc/releases/download/${ARROW_JEMALLOC_BUILD_VERSION}/jemalloc-${ARROW_JEMALLOC_BUILD_VERSION}.tar.bz2" "ARROW_LZ4_URL lz4-${ARROW_LZ4_BUILD_VERSION}.tar.gz https://github.com/lz4/lz4/archive/${ARROW_LZ4_BUILD_VERSION}.tar.gz" "ARROW_MIMALLOC_URL mimalloc-${ARROW_MIMALLOC_BUILD_VERSION}.tar.gz https://github.com/microsoft/mimalloc/archive/${ARROW_MIMALLOC_BUILD_VERSION}.tar.gz" From 07653d31662cafacef2e8d1d99011943acc9ea92 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Fri, 6 Mar 2026 13:46:50 -0500 Subject: [PATCH 12/18] Fix GoogleTest URL in ThirdpartyToolchain.cmake The CMake build was using hardcoded old GoogleTest URL format in ThirdpartyToolchain.cmake, which overrode the fix in versions.txt. Changed from: - /archive/release-1.17.0.tar.gz (404 error) - Chromium mirror (also 404) To: - /releases/download/v1.17.0/googletest-1.17.0.tar.gz (works!) Also removed the Chromium mirror as it doesn't have GoogleTest 1.17.0. Tested locally on macOS ARM64 - build completes successfully. --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 9dcf4d2c06f0..b3245cf9e70f 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -697,8 +697,7 @@ if(DEFINED ENV{ARROW_GTEST_URL}) set(GTEST_SOURCE_URL "$ENV{ARROW_GTEST_URL}") else() set_urls(GTEST_SOURCE_URL - "https://github.com/google/googletest/archive/release-${ARROW_GTEST_BUILD_VERSION}.tar.gz" - "https://chromium.googlesource.com/external/github.com/google/googletest/+archive/release-${ARROW_GTEST_BUILD_VERSION}.tar.gz" + "https://github.com/google/googletest/releases/download/v${ARROW_GTEST_BUILD_VERSION}/googletest-${ARROW_GTEST_BUILD_VERSION}.tar.gz" "${THIRDPARTY_MIRROR_URL}/gtest-${ARROW_GTEST_BUILD_VERSION}.tar.gz") endif() From 90f3ab27b0bf3e49bb8aecc99b9060e0db01efb9 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Fri, 6 Mar 2026 19:10:15 -0500 Subject: [PATCH 13/18] Fix LLVM version detection to handle missing Homebrew packages The FindLLVMAlt.cmake script was failing when trying to find old LLVM versions (llvm@13, llvm@11, llvm@10, llvm@9, llvm@8, llvm@7) that no longer exist in Homebrew. The 'brew --prefix llvm@XX' command would error out and cause CMake configuration to fail. Changes: - Added ERROR_QUIET to execute_process() to suppress brew errors - Added check to only append LLVM_BREW_PREFIX if it's not empty This allows CMake to gracefully try multiple LLVM versions and use the first one that exists (e.g., llvm@14, llvm@15, etc.). --- cpp/cmake_modules/FindLLVMAlt.cmake | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cpp/cmake_modules/FindLLVMAlt.cmake b/cpp/cmake_modules/FindLLVMAlt.cmake index 69f680824b08..139984bc92b6 100644 --- a/cpp/cmake_modules/FindLLVMAlt.cmake +++ b/cpp/cmake_modules/FindLLVMAlt.cmake @@ -50,8 +50,11 @@ if(NOT LLVM_FOUND) "${ARROW_LLVM_VERSION}") execute_process(COMMAND ${BREW} --prefix "llvm@${ARROW_LLVM_VERSION_MAJOR}" OUTPUT_VARIABLE LLVM_BREW_PREFIX - OUTPUT_STRIP_TRAILING_WHITESPACE) - list(APPEND LLVM_HINTS ${LLVM_BREW_PREFIX}) + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET) + if(LLVM_BREW_PREFIX) + list(APPEND LLVM_HINTS ${LLVM_BREW_PREFIX}) + endif() endif() endif() From bfa2d471cd5aa9db8882bbe1c1b9d326fe0ab100 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Tue, 10 Mar 2026 22:39:45 -0400 Subject: [PATCH 14/18] llvm 21 overlay (cherry-picked from c794101960) --- .env | 2 +- .../llvm/0001-fix-install-package-dir.patch | 13 + .../llvm/0002-fix-tools-install-dir.patch | 205 ++++++++ .../overlay/llvm/0003-fix-llvm-config.patch | 16 + .../llvm/0004-disable-libomp-aliases.patch | 32 ++ ci/vcpkg/overlay/llvm/0005-fix-runtimes.patch | 23 + ...06-create-destination-mlir-directory.patch | 16 + ci/vcpkg/overlay/llvm/clang_usage | 5 + ci/vcpkg/overlay/llvm/cmake4.patch | 117 +++++ ci/vcpkg/overlay/llvm/flang_usage | 5 + ci/vcpkg/overlay/llvm/lld_usage | 5 + ci/vcpkg/overlay/llvm/llvm_usage | 15 + ci/vcpkg/overlay/llvm/mlir_usage | 5 + ci/vcpkg/overlay/llvm/portfile.cmake | 367 +++++++++++++ ci/vcpkg/overlay/llvm/vcpkg.json | 488 ++++++++++++++++++ ci/vcpkg/overlay/symengine/portfile.cmake | 62 +++ ci/vcpkg/overlay/symengine/vcpkg.json | 73 +++ ci/vcpkg/vcpkg.json | 1 + 18 files changed, 1449 insertions(+), 1 deletion(-) create mode 100644 ci/vcpkg/overlay/llvm/0001-fix-install-package-dir.patch create mode 100644 ci/vcpkg/overlay/llvm/0002-fix-tools-install-dir.patch create mode 100644 ci/vcpkg/overlay/llvm/0003-fix-llvm-config.patch create mode 100644 ci/vcpkg/overlay/llvm/0004-disable-libomp-aliases.patch create mode 100644 ci/vcpkg/overlay/llvm/0005-fix-runtimes.patch create mode 100644 ci/vcpkg/overlay/llvm/0006-create-destination-mlir-directory.patch create mode 100644 ci/vcpkg/overlay/llvm/clang_usage create mode 100644 ci/vcpkg/overlay/llvm/cmake4.patch create mode 100644 ci/vcpkg/overlay/llvm/flang_usage create mode 100644 ci/vcpkg/overlay/llvm/lld_usage create mode 100644 ci/vcpkg/overlay/llvm/llvm_usage create mode 100644 ci/vcpkg/overlay/llvm/mlir_usage create mode 100644 ci/vcpkg/overlay/llvm/portfile.cmake create mode 100644 ci/vcpkg/overlay/llvm/vcpkg.json create mode 100644 ci/vcpkg/overlay/symengine/portfile.cmake create mode 100644 ci/vcpkg/overlay/symengine/vcpkg.json diff --git a/.env b/.env index 215aa49109b4..1231d0788739 100644 --- a/.env +++ b/.env @@ -62,7 +62,7 @@ HDFS=3.2.1 JDK=11 KARTOTHEK=latest # LLVM 12 and GCC 11 reports -Wmismatched-new-delete. -LLVM=14 +LLVM=21 MAVEN=3.8.7 NODE=18 NUMBA=latest diff --git a/ci/vcpkg/overlay/llvm/0001-fix-install-package-dir.patch b/ci/vcpkg/overlay/llvm/0001-fix-install-package-dir.patch new file mode 100644 index 000000000000..6bbe1af08f89 --- /dev/null +++ b/ci/vcpkg/overlay/llvm/0001-fix-install-package-dir.patch @@ -0,0 +1,13 @@ + openmp/tools/Modules/CMakeLists.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/openmp/tools/Modules/CMakeLists.txt b/openmp/tools/Modules/CMakeLists.txt +index 22d818eea72d..75aacc4468d4 100644 +--- a/openmp/tools/Modules/CMakeLists.txt ++++ b/openmp/tools/Modules/CMakeLists.txt +@@ -12,4 +12,4 @@ + + + install(FILES "FindOpenMPTarget.cmake" +- DESTINATION "${OPENMP_INSTALL_LIBDIR}/cmake/openmp") ++ DESTINATION "share/openmp") diff --git a/ci/vcpkg/overlay/llvm/0002-fix-tools-install-dir.patch b/ci/vcpkg/overlay/llvm/0002-fix-tools-install-dir.patch new file mode 100644 index 000000000000..bf46382567a3 --- /dev/null +++ b/ci/vcpkg/overlay/llvm/0002-fix-tools-install-dir.patch @@ -0,0 +1,205 @@ + bolt/cmake/modules/AddBOLT.cmake | 2 +- + clang-tools-extra/clang-tidy/tool/CMakeLists.txt | 2 +- + clang-tools-extra/modularize/CMakeLists.txt | 2 +- + clang/cmake/modules/AddClang.cmake | 4 ++-- + clang/tools/c-index-test/CMakeLists.txt | 2 +- + clang/tools/clang-format/CMakeLists.txt | 4 ++-- + clang/tools/scan-build-py/CMakeLists.txt | 4 ++-- + clang/tools/scan-build/CMakeLists.txt | 2 +- + clang/tools/scan-view/CMakeLists.txt | 2 +- + flang/cmake/modules/AddFlang.cmake | 2 +- + flang/tools/flang-driver/CMakeLists.txt | 2 +- + lld/cmake/modules/AddLLD.cmake | 4 ++-- + lldb/cmake/modules/AddLLDB.cmake | 2 +- + 13 files changed, 17 insertions(+), 17 deletions(-) + +diff --git a/bolt/cmake/modules/AddBOLT.cmake b/bolt/cmake/modules/AddBOLT.cmake +index c7ac662c6b12..f5a7e7c01c66 100644 +--- a/bolt/cmake/modules/AddBOLT.cmake ++++ b/bolt/cmake/modules/AddBOLT.cmake +@@ -16,7 +16,7 @@ macro(add_bolt_tool name) + get_target_export_arg(${name} BOLT export_to_bolttargets) + install(TARGETS ${name} + ${export_to_bolttargets} +- RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" ++ RUNTIME DESTINATION "${BOLT_TOOLS_INSTALL_DIR}" + COMPONENT bolt) + + if(NOT LLVM_ENABLE_IDE) +diff --git a/clang-tools-extra/clang-tidy/tool/CMakeLists.txt b/clang-tools-extra/clang-tidy/tool/CMakeLists.txt +index 0d4501d1eac0..a6ff0261f5f3 100644 +--- a/clang-tools-extra/clang-tidy/tool/CMakeLists.txt ++++ b/clang-tools-extra/clang-tidy/tool/CMakeLists.txt +@@ -66,6 +66,6 @@ install(PROGRAMS clang-tidy-diff.py + DESTINATION "${CMAKE_INSTALL_DATADIR}/clang" + COMPONENT clang-tidy) + install(PROGRAMS run-clang-tidy.py +- DESTINATION "${CMAKE_INSTALL_BINDIR}" ++ DESTINATION "${CLANG_TOOLS_INSTALL_DIR}" + COMPONENT clang-tidy + RENAME run-clang-tidy) +diff --git a/clang-tools-extra/modularize/CMakeLists.txt b/clang-tools-extra/modularize/CMakeLists.txt +index eb5383c3ad44..39a34dfe8c71 100644 +--- a/clang-tools-extra/modularize/CMakeLists.txt ++++ b/clang-tools-extra/modularize/CMakeLists.txt +@@ -27,5 +27,5 @@ clang_target_link_libraries(modularize + ) + + install(TARGETS modularize +- RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" ++ RUNTIME DESTINATION "${CLANG_TOOLS_INSTALL_DIR}" + COMPONENT clang-extras) +diff --git a/clang/cmake/modules/AddClang.cmake b/clang/cmake/modules/AddClang.cmake +index 4059fc3e986c..2dc34826ba1e 100644 +--- a/clang/cmake/modules/AddClang.cmake ++++ b/clang/cmake/modules/AddClang.cmake +@@ -183,11 +183,11 @@ macro(add_clang_tool name) + get_target_export_arg(${name} Clang export_to_clangtargets) + install(TARGETS ${name} + ${export_to_clangtargets} +- RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" ++ RUNTIME DESTINATION "${CLANG_TOOLS_INSTALL_DIR}" + COMPONENT ${name}) + + if (LLVM_ENABLE_PDB) +- install(FILES $ DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT ${name} OPTIONAL) ++ install(FILES $ DESTINATION "${CLANG_TOOLS_INSTALL_DIR}" COMPONENT ${name} OPTIONAL) + endif() + + if(NOT LLVM_ENABLE_IDE) +diff --git a/clang/tools/c-index-test/CMakeLists.txt b/clang/tools/c-index-test/CMakeLists.txt +index 24e7c9692ca5..841f49cd5e0b 100644 +--- a/clang/tools/c-index-test/CMakeLists.txt ++++ b/clang/tools/c-index-test/CMakeLists.txt +@@ -48,7 +48,7 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) + set_property(TARGET c-index-test APPEND PROPERTY INSTALL_RPATH + "@executable_path/../../lib") + else() +- set(INSTALL_DESTINATION "${CMAKE_INSTALL_BINDIR}") ++ set(INSTALL_DESTINATION "${CLANG_TOOLS_INSTALL_DIR}") + endif() + + install(TARGETS c-index-test +diff --git a/clang/tools/clang-format/CMakeLists.txt b/clang/tools/clang-format/CMakeLists.txt +index 1c61a3c8fb80..41f019c1fbf9 100644 +--- a/clang/tools/clang-format/CMakeLists.txt ++++ b/clang/tools/clang-format/CMakeLists.txt +@@ -36,11 +36,11 @@ install(FILES clang-format.py + DESTINATION "${CMAKE_INSTALL_DATADIR}/clang" + COMPONENT clang-format) + install(PROGRAMS git-clang-format +- DESTINATION "${CMAKE_INSTALL_BINDIR}" ++ DESTINATION "${CLANG_TOOLS_INSTALL_DIR}" + COMPONENT clang-format) + + if (WIN32 AND NOT CYGWIN) + install(PROGRAMS git-clang-format.bat +- DESTINATION "${CMAKE_INSTALL_BINDIR}" ++ DESTINATION "${CLANG_TOOLS_INSTALL_DIR}" + COMPONENT clang-format) + endif() +diff --git a/clang/tools/scan-build-py/CMakeLists.txt b/clang/tools/scan-build-py/CMakeLists.txt +index 9273eb5ed977..f9abcb2ca248 100644 +--- a/clang/tools/scan-build-py/CMakeLists.txt ++++ b/clang/tools/scan-build-py/CMakeLists.txt +@@ -43,7 +43,7 @@ foreach(BinFile ${BinFiles}) + ${CMAKE_BINARY_DIR}/bin/scan-build-py + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/bin/scan-build) + install (PROGRAMS "bin/scan-build" +- DESTINATION "${CMAKE_INSTALL_BINDIR}" ++ DESTINATION "${CLANG_TOOLS_INSTALL_DIR}" + RENAME scan-build-py + COMPONENT scan-build-py) + list(APPEND Depends ${CMAKE_BINARY_DIR}/bin/scan-build-py) +@@ -56,7 +56,7 @@ foreach(BinFile ${BinFiles}) + ${CMAKE_BINARY_DIR}/bin/ + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/bin/${BinFile}) + install(PROGRAMS bin/${BinFile} +- DESTINATION "${CMAKE_INSTALL_BINDIR}" ++ DESTINATION "${CLANG_TOOLS_INSTALL_DIR}" + COMPONENT scan-build-py) + list(APPEND Depends ${CMAKE_BINARY_DIR}/bin/${BinFile}) + endif() +diff --git a/clang/tools/scan-build/CMakeLists.txt b/clang/tools/scan-build/CMakeLists.txt +index ef687b0e90a1..47f31efc9174 100644 +--- a/clang/tools/scan-build/CMakeLists.txt ++++ b/clang/tools/scan-build/CMakeLists.txt +@@ -47,7 +47,7 @@ if(CLANG_INSTALL_SCANBUILD) + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/bin/${BinFile}) + list(APPEND Depends ${CMAKE_BINARY_DIR}/bin/${BinFile}) + install(PROGRAMS bin/${BinFile} +- DESTINATION "${CMAKE_INSTALL_BINDIR}" ++ DESTINATION "${CLANG_TOOLS_INSTALL_DIR}" + COMPONENT scan-build) + endforeach() + +diff --git a/clang/tools/scan-view/CMakeLists.txt b/clang/tools/scan-view/CMakeLists.txt +index 07aec76ee66f..55a945bb278d 100644 +--- a/clang/tools/scan-view/CMakeLists.txt ++++ b/clang/tools/scan-view/CMakeLists.txt +@@ -20,7 +20,7 @@ if(CLANG_INSTALL_SCANVIEW) + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/bin/${BinFile}) + list(APPEND Depends ${CMAKE_BINARY_DIR}/bin/${BinFile}) + install(PROGRAMS bin/${BinFile} +- DESTINATION "${CMAKE_INSTALL_BINDIR}" ++ DESTINATION "${CLANG_TOOLS_INSTALL_DIR}" + COMPONENT scan-view) + endforeach() + +diff --git a/flang/cmake/modules/AddFlang.cmake b/flang/cmake/modules/AddFlang.cmake +index ca233103ccdb..e079f33d9426 100644 +--- a/flang/cmake/modules/AddFlang.cmake ++++ b/flang/cmake/modules/AddFlang.cmake +@@ -122,7 +122,7 @@ macro(add_flang_tool name) + get_target_export_arg(${name} Flang export_to_flangtargets) + install(TARGETS ${name} + ${export_to_flangtargets} +- RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" ++ RUNTIME DESTINATION "${FLANG_TOOLS_INSTALL_DIR}" + COMPONENT ${name}) + + if(NOT LLVM_ENABLE_IDE) +diff --git a/flang/tools/flang-driver/CMakeLists.txt b/flang/tools/flang-driver/CMakeLists.txt +index b5d672702512..67f5d4304dac 100644 +--- a/flang/tools/flang-driver/CMakeLists.txt ++++ b/flang/tools/flang-driver/CMakeLists.txt +@@ -43,7 +43,7 @@ if(FLANG_PLUGIN_SUPPORT) + export_executable_symbols_for_plugins(flang) + endif() + +-install(TARGETS flang DESTINATION "${CMAKE_INSTALL_BINDIR}") ++install(TARGETS flang DESTINATION "${FLANG_TOOLS_INSTALL_DIR}") + + # Keep "flang-new" as a symlink for backwards compatiblity. Remove once "flang" + # is a widely adopted name. +diff --git a/lld/cmake/modules/AddLLD.cmake b/lld/cmake/modules/AddLLD.cmake +index 37f73afa915f..7df335698aab 100644 +--- a/lld/cmake/modules/AddLLD.cmake ++++ b/lld/cmake/modules/AddLLD.cmake +@@ -55,11 +55,11 @@ macro(add_lld_tool name) + get_target_export_arg(${name} LLD export_to_lldtargets) + install(TARGETS ${name} + ${export_to_lldtargets} +- RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" ++ RUNTIME DESTINATION "${LLD_TOOLS_INSTALL_DIR}" + COMPONENT ${name}) + + if (LLVM_ENABLE_PDB) +- install(FILES $ DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT ${name} OPTIONAL) ++ install(FILES $ DESTINATION "${LLD_TOOLS_INSTALL_DIR}" COMPONENT ${name} OPTIONAL) + endif() + + if(NOT CMAKE_CONFIGURATION_TYPES) +diff --git a/lldb/cmake/modules/AddLLDB.cmake b/lldb/cmake/modules/AddLLDB.cmake +index 28bf8d816d89..372eec626160 100644 +--- a/lldb/cmake/modules/AddLLDB.cmake ++++ b/lldb/cmake/modules/AddLLDB.cmake +@@ -184,7 +184,7 @@ function(add_lldb_executable name) + endif() + + if(ARG_GENERATE_INSTALL) +- set(install_dest bin) ++ set(install_dest "${LLVM_TOOLS_INSTALL_DIR}") + if(ARG_INSTALL_PREFIX) + set(install_dest ${ARG_INSTALL_PREFIX}) + endif() diff --git a/ci/vcpkg/overlay/llvm/0003-fix-llvm-config.patch b/ci/vcpkg/overlay/llvm/0003-fix-llvm-config.patch new file mode 100644 index 000000000000..568cb17cbb05 --- /dev/null +++ b/ci/vcpkg/overlay/llvm/0003-fix-llvm-config.patch @@ -0,0 +1,16 @@ + llvm/tools/llvm-config/llvm-config.cpp | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/llvm/tools/llvm-config/llvm-config.cpp b/llvm/tools/llvm-config/llvm-config.cpp +index d5b76b1bb6c1..9fedcb2ab75f 100644 +--- a/llvm/tools/llvm-config/llvm-config.cpp ++++ b/llvm/tools/llvm-config/llvm-config.cpp +@@ -304,7 +304,7 @@ int main(int argc, char **argv) { + // bin dir). + sys::fs::make_absolute(CurrentPath); + CurrentExecPrefix = +- sys::path::parent_path(sys::path::parent_path(CurrentPath)).str(); ++ sys::path::parent_path(sys::path::parent_path(sys::path::parent_path(CurrentPath))).str(); + + // Check to see if we are inside a development tree by comparing to possible + // locations (prefix style or CMake style). diff --git a/ci/vcpkg/overlay/llvm/0004-disable-libomp-aliases.patch b/ci/vcpkg/overlay/llvm/0004-disable-libomp-aliases.patch new file mode 100644 index 000000000000..6736f2a31d53 --- /dev/null +++ b/ci/vcpkg/overlay/llvm/0004-disable-libomp-aliases.patch @@ -0,0 +1,32 @@ + openmp/runtime/src/CMakeLists.txt | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt +index bb5822264514..340cef14df89 100644 +--- a/openmp/runtime/src/CMakeLists.txt ++++ b/openmp/runtime/src/CMakeLists.txt +@@ -215,7 +215,7 @@ endif() + set(LIBOMP_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE) + + # Add symbolic links to libomp +-if(NOT WIN32) ++if(0) + add_custom_command(TARGET omp POST_BUILD + COMMAND ${CMAKE_COMMAND} -E create_symlink ${LIBOMP_LIB_FILE} + libgomp${LIBOMP_LIBRARY_SUFFIX} +@@ -367,6 +367,7 @@ if(WIN32) + install(TARGETS omp ${export_to_llvmexports} RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}") + install(TARGETS ${LIBOMP_IMP_LIB_TARGET} ARCHIVE DESTINATION "${OPENMP_INSTALL_LIBDIR}") + # Create aliases (regular copies) of the library for backwards compatibility ++ if(0) + set(LIBOMP_ALIASES "libiomp5md") + foreach(alias IN LISTS LIBOMP_ALIASES) + install(CODE "execute_process(COMMAND \"\${CMAKE_COMMAND}\" -E copy \"${LIBOMP_LIB_FILE}\" +@@ -375,6 +376,7 @@ if(WIN32) + install(CODE "execute_process(COMMAND \"\${CMAKE_COMMAND}\" -E copy \"${LIBOMP_IMP_LIB_FILE}\" + \"${alias}${CMAKE_STATIC_LIBRARY_SUFFIX}\" WORKING_DIRECTORY \"${outdir}\")") + endforeach() ++ endif() + else() + + install(TARGETS omp ${export_to_llvmexports} ${LIBOMP_INSTALL_KIND} DESTINATION "${OPENMP_INSTALL_LIBDIR}") diff --git a/ci/vcpkg/overlay/llvm/0005-fix-runtimes.patch b/ci/vcpkg/overlay/llvm/0005-fix-runtimes.patch new file mode 100644 index 000000000000..0047333171ef --- /dev/null +++ b/ci/vcpkg/overlay/llvm/0005-fix-runtimes.patch @@ -0,0 +1,23 @@ + llvm/runtimes/CMakeLists.txt | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt +index 94a43b96d218..fff91366fbb2 100644 +--- a/llvm/runtimes/CMakeLists.txt ++++ b/llvm/runtimes/CMakeLists.txt +@@ -504,11 +504,13 @@ if(build_runtimes) + # Forward user-provived system configuration to runtimes for requirement introspection. + # CMAKE_PREFIX_PATH is the search path for CMake packages. + if(CMAKE_PREFIX_PATH) +- list(APPEND extra_cmake_args "-DCMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}") ++ string(REPLACE ";" "|" new_value "${CMAKE_PREFIX_PATH}") ++ list(APPEND extra_cmake_args "-DCMAKE_PREFIX_PATH=${new_value}") + endif() + # CMAKE_PROGRAM_PATH is the search path for executables such as python. + if(CMAKE_PROGRAM_PATH) +- list(APPEND extra_cmake_args "-DCMAKE_PROGRAM_PATH=${CMAKE_PROGRAM_PATH}") ++ string(REPLACE ";" "|" new_value "${CMAKE_PROGRAM_PATH}") ++ list(APPEND extra_cmake_args "-DCMAKE_PROGRAM_PATH=${new_value}") + endif() + + # TODO: We need to consider passing it as '-DRUNTIMES_x86_64_LLVM_ENABLE_RUNTIMES'. diff --git a/ci/vcpkg/overlay/llvm/0006-create-destination-mlir-directory.patch b/ci/vcpkg/overlay/llvm/0006-create-destination-mlir-directory.patch new file mode 100644 index 000000000000..4950a48c3b5e --- /dev/null +++ b/ci/vcpkg/overlay/llvm/0006-create-destination-mlir-directory.patch @@ -0,0 +1,16 @@ + mlir/python/CMakeLists.txt | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/mlir/python/CMakeLists.txt b/mlir/python/CMakeLists.txt +index 50fbca38a08f..957a6722c21c 100644 +--- a/mlir/python/CMakeLists.txt ++++ b/mlir/python/CMakeLists.txt +@@ -527,6 +527,8 @@ add_mlir_python_common_capi_library(MLIRPythonCAPI + ${_ADDL_TEST_SOURCES} + ) + ++file(MAKE_DIRECTORY "${MLIR_BINARY_DIR}/python_packages/mlir_core/mlir/_mlir_libs") ++ + ################################################################################ + # Custom targets. + ################################################################################ diff --git a/ci/vcpkg/overlay/llvm/clang_usage b/ci/vcpkg/overlay/llvm/clang_usage new file mode 100644 index 000000000000..f239721f484f --- /dev/null +++ b/ci/vcpkg/overlay/llvm/clang_usage @@ -0,0 +1,5 @@ +The package clang provides CMake targets: + + find_package(Clang CONFIG REQUIRED) + target_include_directories(main PRIVATE ${CLANG_INCLUDE_DIRS}) + target_link_libraries(main PRIVATE clangBasic clangLex clangParse clangAST ...) diff --git a/ci/vcpkg/overlay/llvm/cmake4.patch b/ci/vcpkg/overlay/llvm/cmake4.patch new file mode 100644 index 000000000000..984981e88f8b --- /dev/null +++ b/ci/vcpkg/overlay/llvm/cmake4.patch @@ -0,0 +1,117 @@ +diff --git a/cmake/Modules/HandleCompilerRT.cmake b/cmake/Modules/HandleCompilerRT.cmake +index 6865f45175..33dda44d18 100644 +--- a/cmake/Modules/HandleCompilerRT.cmake ++++ b/cmake/Modules/HandleCompilerRT.cmake +@@ -20,25 +20,25 @@ function(get_component_name name variable) + if(NOT name MATCHES "builtins.*") + set(component_name "${name}_") + endif() +- if (CMAKE_OSX_SYSROOT MATCHES ".+MacOSX.+") ++ if (_CMAKE_OSX_SYSROOT_PATH MATCHES ".+MacOSX.+") + set(component_name "${component_name}osx") + +- elseif (CMAKE_OSX_SYSROOT MATCHES ".+iPhoneOS.+") ++ elseif (_CMAKE_OSX_SYSROOT_PATH MATCHES ".+iPhoneOS.+") + set(component_name "${component_name}ios") +- elseif (CMAKE_OSX_SYSROOT MATCHES ".+iPhoneSimulator.+") ++ elseif (_CMAKE_OSX_SYSROOT_PATH MATCHES ".+iPhoneSimulator.+") + set(component_name "${component_name}iossim") + +- elseif (CMAKE_OSX_SYSROOT MATCHES ".+AppleTVOS.+") ++ elseif (_CMAKE_OSX_SYSROOT_PATH MATCHES ".+AppleTVOS.+") + set(component_name "${component_name}tvos") +- elseif (CMAKE_OSX_SYSROOT MATCHES ".+AppleTVSimulator.+") ++ elseif (_CMAKE_OSX_SYSROOT_PATH MATCHES ".+AppleTVSimulator.+") + set(component_name "${component_name}tvossim") + +- elseif (CMAKE_OSX_SYSROOT MATCHES ".+WatchOS.+") ++ elseif (_CMAKE_OSX_SYSROOT_PATH MATCHES ".+WatchOS.+") + set(component_name "${component_name}watchos") +- elseif (CMAKE_OSX_SYSROOT MATCHES ".+WatchSimulator.+") ++ elseif (_CMAKE_OSX_SYSROOT_PATH MATCHES ".+WatchSimulator.+") + set(component_name "${component_name}watchossim") + else() +- message(WARNING "Unknown Apple SDK ${CMAKE_OSX_SYSROOT}, we don't know which compiler-rt library suffix to use.") ++ message(WARNING "Unknown Apple SDK ${_CMAKE_OSX_SYSROOT_PATH}, we don't know which compiler-rt library suffix to use.") + endif() + else() + set(component_name "${name}") +diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt +index f4722c3b35..a553615ebc 100644 +--- a/libcxxabi/src/CMakeLists.txt ++++ b/libcxxabi/src/CMakeLists.txt +@@ -134,7 +134,7 @@ if ( APPLE ) + # Make sure we link in CrashReporterClient if we find it -- it's used by + # abort() on Apple platforms when building the system dylib. + find_library(CrashReporterClient NAMES libCrashReporterClient.a +- PATHS "${CMAKE_OSX_SYSROOT}/usr/local/lib") ++ PATHS "${_CMAKE_OSX_SYSROOT_PATH}/usr/local/lib") + if (CrashReporterClient) + message(STATUS "Linking with CrashReporterClient at ${CrashReporterClient}") + add_library_flags("${CrashReporterClient}") +diff --git a/lldb/tools/debugserver/source/CMakeLists.txt b/lldb/tools/debugserver/source/CMakeLists.txt +index f0b9756bec..0c784545c4 100644 +--- a/lldb/tools/debugserver/source/CMakeLists.txt ++++ b/lldb/tools/debugserver/source/CMakeLists.txt +@@ -125,17 +125,17 @@ if(APPLE) + set(LIBCOMPRESSION compression) + if(APPLE_EMBEDDED) + find_library(BACKBOARD_LIBRARY BackBoardServices +- PATHS ${CMAKE_OSX_SYSROOT}/System/Library/PrivateFrameworks) ++ PATHS ${_CMAKE_OSX_SYSROOT_PATH}/System/Library/PrivateFrameworks) + find_library(FRONTBOARD_LIBRARY FrontBoardServices +- PATHS ${CMAKE_OSX_SYSROOT}/System/Library/PrivateFrameworks) ++ PATHS ${_CMAKE_OSX_SYSROOT_PATH}/System/Library/PrivateFrameworks) + find_library(SPRINGBOARD_LIBRARY SpringBoardServices +- PATHS ${CMAKE_OSX_SYSROOT}/System/Library/PrivateFrameworks) ++ PATHS ${_CMAKE_OSX_SYSROOT_PATH}/System/Library/PrivateFrameworks) + find_library(MOBILESERVICES_LIBRARY MobileCoreServices +- PATHS ${CMAKE_OSX_SYSROOT}/System/Library/PrivateFrameworks) ++ PATHS ${_CMAKE_OSX_SYSROOT_PATH}/System/Library/PrivateFrameworks) + find_library(LOCKDOWN_LIBRARY lockdown) + if (APPLE_EMBEDDED STREQUAL "watchos") + find_library(CAROUSELSERVICES_LIBRARY CarouselServices +- PATHS ${CMAKE_OSX_SYSROOT}/System/Library/PrivateFrameworks) ++ PATHS ${_CMAKE_OSX_SYSROOT_PATH}/System/Library/PrivateFrameworks) + endif() + + if(NOT BACKBOARD_LIBRARY) +@@ -158,7 +158,7 @@ endif() + + add_definitions(-DLLDB_USE_OS_LOG) + +-if(${CMAKE_OSX_SYSROOT} MATCHES ".Internal.sdk$") ++if(${_CMAKE_OSX_SYSROOT_PATH} MATCHES ".Internal.sdk$") + message(STATUS "LLDB debugserver energy support is enabled") + add_definitions(-DLLDB_ENERGY) + set(ENERGY_LIBRARY -lpmenergy -lpmsample) +@@ -181,7 +181,7 @@ endif() + separate_arguments(MIG_ARCH_FLAGS_SEPARTED NATIVE_COMMAND "${MIG_ARCH_FLAGS}") + + add_custom_command(OUTPUT ${generated_mach_interfaces} +- VERBATIM COMMAND mig ${MIG_ARCH_FLAGS_SEPARTED} -isysroot ${CMAKE_OSX_SYSROOT} ${CMAKE_CURRENT_SOURCE_DIR}/MacOSX/dbgnub-mig.defs ++ VERBATIM COMMAND mig ${MIG_ARCH_FLAGS_SEPARTED} -isysroot ${_CMAKE_OSX_SYSROOT_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/MacOSX/dbgnub-mig.defs + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/MacOSX/dbgnub-mig.defs + ) + +@@ -305,7 +305,7 @@ if(APPLE_EMBEDDED) + WITH_BKS + ) + set_property(TARGET lldbDebugserverCommon APPEND PROPERTY COMPILE_FLAGS +- -F${CMAKE_OSX_SYSROOT}/System/Library/PrivateFrameworks ++ -F${_CMAKE_OSX_SYSROOT_PATH}/System/Library/PrivateFrameworks + ) + + add_lldb_library(lldbDebugserverCommon_NonUI ${lldbDebugserverCommonSources}) +diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt +index 6127b76db0..0a810db531 100644 +--- a/llvm/test/CMakeLists.txt ++++ b/llvm/test/CMakeLists.txt +@@ -33,6 +33,7 @@ configure_lit_site_cfg( + MAIN_CONFIG + ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py + PATHS ++ "_CMAKE_OSX_SYSROOT_PATH" + "CMAKE_OSX_SYSROOT" + "LLVM_SOURCE_DIR" + "LLVM_BINARY_DIR" diff --git a/ci/vcpkg/overlay/llvm/flang_usage b/ci/vcpkg/overlay/llvm/flang_usage new file mode 100644 index 000000000000..b3cb08592f03 --- /dev/null +++ b/ci/vcpkg/overlay/llvm/flang_usage @@ -0,0 +1,5 @@ +The package flang provides CMake targets: + + find_package(Flang CONFIG REQUIRED) + target_include_directories(main PRIVATE ${FLANG_INCLUDE_DIRS}) + target_link_libraries(main PRIVATE flangFrontend flangFrontendTool ...) diff --git a/ci/vcpkg/overlay/llvm/lld_usage b/ci/vcpkg/overlay/llvm/lld_usage new file mode 100644 index 000000000000..92c8d10a0857 --- /dev/null +++ b/ci/vcpkg/overlay/llvm/lld_usage @@ -0,0 +1,5 @@ +The package lld provides CMake targets: + + find_package(LLD CONFIG REQUIRED) + target_include_directories(main PRIVATE ${LLD_INCLUDE_DIRS}) + target_link_libraries(main PRIVATE lldCommon lldCore lldDriver ...) diff --git a/ci/vcpkg/overlay/llvm/llvm_usage b/ci/vcpkg/overlay/llvm/llvm_usage new file mode 100644 index 000000000000..72d57dbfa573 --- /dev/null +++ b/ci/vcpkg/overlay/llvm/llvm_usage @@ -0,0 +1,15 @@ +The package llvm provides CMake targets: + + find_package(LLVM CONFIG REQUIRED) + + list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}") + include(HandleLLVMOptions) + add_definitions(${LLVM_DEFINITIONS}) + + target_include_directories(main PRIVATE ${LLVM_INCLUDE_DIRS}) + + # Find the libraries that correspond to the LLVM components that we wish to use + llvm_map_components_to_libnames(llvm_libs Support Core IRReader ...) + + # Link against LLVM libraries + target_link_libraries(main PRIVATE ${llvm_libs}) diff --git a/ci/vcpkg/overlay/llvm/mlir_usage b/ci/vcpkg/overlay/llvm/mlir_usage new file mode 100644 index 000000000000..da45a1612ec8 --- /dev/null +++ b/ci/vcpkg/overlay/llvm/mlir_usage @@ -0,0 +1,5 @@ +The package lld provides CMake targets: + + find_package(MLIR CONFIG REQUIRED) + target_include_directories(main PRIVATE ${MLIR_INCLUDE_DIRS}) + target_link_libraries(main PRIVATE MLIRIR MLIRParser MLIRPass MLIRSupport ...) diff --git a/ci/vcpkg/overlay/llvm/portfile.cmake b/ci/vcpkg/overlay/llvm/portfile.cmake new file mode 100644 index 000000000000..b6d5bdacd02c --- /dev/null +++ b/ci/vcpkg/overlay/llvm/portfile.cmake @@ -0,0 +1,367 @@ +# Suppress warning: There should be no installed empty directories +set(VCPKG_POLICY_ALLOW_EMPTY_FOLDERS enabled) + +vcpkg_check_linkage(ONLY_STATIC_LIBRARY) + +# [BOLT] Allow to compile with MSVC (#151189) +vcpkg_download_distfile( + PATCH1_FILE + URLS https://github.com/llvm/llvm-project/commit/497d17737518d417f6411d46aef1334f642ccd81.patch?full_index=1 + SHA512 7bf4d4ee8f72fea5b8094320d1f3a71063ec19fe1b552424182c4140055bf6aacfa9ff64b0bcab0a8d6739e4b6249641f58d19fb6b35e1ada67b66b53776dc1a + FILENAME 497d17737518d417f6411d46aef1334f642ccd81.patch +) + +vcpkg_from_github( + OUT_SOURCE_PATH SOURCE_PATH + REPO llvm/llvm-project + REF "llvmorg-${VERSION}" + SHA512 85d272761253428b648f3d111b7308f8cdee74cceebec9e709126c4555ad1e78c443183ad8eb7319e0a15bafb97868ab5b5a3d86ba64812750c568dbf715d8ec + HEAD_REF main + PATCHES + 0001-fix-install-package-dir.patch + 0002-fix-tools-install-dir.patch + 0003-fix-llvm-config.patch + 0004-disable-libomp-aliases.patch + 0005-fix-runtimes.patch + 0006-create-destination-mlir-directory.patch + "${PATCH1_FILE}" +) + +vcpkg_check_features( + OUT_FEATURE_OPTIONS FEATURE_OPTIONS + FEATURES + tools LLVM_BUILD_TOOLS + tools LLVM_INCLUDE_TOOLS + utils LLVM_BUILD_UTILS + utils LLVM_INCLUDE_UTILS + utils LLVM_INSTALL_UTILS + enable-assertions LLVM_ENABLE_ASSERTIONS + enable-rtti LLVM_ENABLE_RTTI + enable-ffi LLVM_ENABLE_FFI + enable-eh LLVM_ENABLE_EH + enable-bindings LLVM_ENABLE_BINDINGS + export-symbols LLVM_EXPORT_SYMBOLS_FOR_PLUGINS +) + +vcpkg_cmake_get_vars(cmake_vars_file) +include("${cmake_vars_file}") + +# LLVM generates CMake error due to Visual Studio version 16.4 is known to miscompile part of LLVM. +# LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN=ON disables this error. +# See https://developercommunity.visualstudio.com/content/problem/845933/miscompile-boolean-condition-deduced-to-be-always.html +# and thread "[llvm-dev] Longstanding failing tests - clang-tidy, MachO, Polly" on llvm-dev Jan 21-23 2020. +if(VCPKG_DETECTED_CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" AND VCPKG_DETECTED_MSVC_VERSION LESS "1925") + list(APPEND FEATURE_OPTIONS + -DLLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN=ON + ) +endif() + +# Force enable or disable external libraries +set(llvm_external_libraries + zlib + libxml2 + zstd +) +foreach(external_library IN LISTS llvm_external_libraries) + string(TOLOWER "enable-${external_library}" feature_name) + string(TOUPPER "LLVM_ENABLE_${external_library}" define_name) + if(feature_name IN_LIST FEATURES) + list(APPEND FEATURE_OPTIONS + -D${define_name}=FORCE_ON + ) + else() + list(APPEND FEATURE_OPTIONS + -D${define_name}=OFF + ) + endif() +endforeach() + +# LLVM_ABI_BREAKING_CHECKS can be WITH_ASSERTS (default), FORCE_ON or FORCE_OFF. +# By default in LLVM, abi-breaking checks are enabled if assertions are enabled. +# however, this breaks linking with the debug versions, since the option is +# baked into the header files; thus, we always turn off LLVM_ABI_BREAKING_CHECKS +# unless the user asks for it +if("enable-abi-breaking-checks" IN_LIST FEATURES) + # Force enable abi-breaking checks. + list(APPEND FEATURE_OPTIONS + -DLLVM_ABI_BREAKING_CHECKS=FORCE_ON + ) +else() + # Force disable abi-breaking checks. + list(APPEND FEATURE_OPTIONS + -DLLVM_ABI_BREAKING_CHECKS=FORCE_OFF + ) +endif() + +# All projects: bolt;clang;clang-tools-extra;lld;lldb;mlir;polly +# Extra projects: flang +set(LLVM_ENABLE_PROJECTS) +if("bolt" IN_LIST FEATURES) + list(APPEND LLVM_ENABLE_PROJECTS "bolt") + list(APPEND FEATURE_OPTIONS + -DBOLT_TOOLS_INSTALL_DIR:PATH=tools/llvm + ) +endif() +if("clang" IN_LIST FEATURES) + list(APPEND LLVM_ENABLE_PROJECTS "clang") + vcpkg_check_features( + OUT_FEATURE_OPTIONS CLANG_FEATURE_OPTIONS + FEATURES + clang-enable-cir CLANG_ENABLE_CIR + clang-enable-static-analyzer CLANG_ENABLE_STATIC_ANALYZER + ) + string(REGEX MATCH "^[0-9]+" CLANG_VERSION_MAJOR ${VERSION}) + list(APPEND CLANG_FEATURE_OPTIONS + -DCLANG_INSTALL_PACKAGE_DIR:PATH=share/clang + -DCLANG_TOOLS_INSTALL_DIR:PATH=tools/llvm + # 1) LLVM/Clang tools are relocated from ./bin/ to ./tools/llvm/ (CLANG_TOOLS_INSTALL_DIR=tools/llvm) + # 2) Clang resource files should be relocated from lib/clang/ to ../tools/llvm/lib/clang/ + -DCLANG_RESOURCE_DIR=lib/clang/${CLANG_VERSION_MAJOR} + ) +endif() +if("clang-tools-extra" IN_LIST FEATURES) + list(APPEND LLVM_ENABLE_PROJECTS "clang-tools-extra") +endif() +if("flang" IN_LIST FEATURES) + if(VCPKG_DETECTED_CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" AND VCPKG_TARGET_ARCHITECTURE STREQUAL "x86") + message(FATAL_ERROR "Building Flang with MSVC is not supported on x86. Disable it until issues are fixed.") + endif() + list(APPEND LLVM_ENABLE_PROJECTS "flang") + list(APPEND FEATURE_OPTIONS + -DFLANG_INSTALL_PACKAGE_DIR:PATH=share/flang + -DFLANG_TOOLS_INSTALL_DIR:PATH=tools/llvm + ) + list(APPEND FEATURE_OPTIONS + # Flang requires C++17 + -DCMAKE_CXX_STANDARD=17 + ) +endif() +if("lld" IN_LIST FEATURES) + list(APPEND LLVM_ENABLE_PROJECTS "lld") + list(APPEND FEATURE_OPTIONS + -DLLD_INSTALL_PACKAGE_DIR:PATH=share/lld + -DLLD_TOOLS_INSTALL_DIR:PATH=tools/llvm + ) +endif() +if("lldb" IN_LIST FEATURES) + list(APPEND LLVM_ENABLE_PROJECTS "lldb") + list(APPEND FEATURE_OPTIONS + -DLLDB_ENABLE_CURSES=OFF + ) +endif() +if("mlir" IN_LIST FEATURES) + list(APPEND LLVM_ENABLE_PROJECTS "mlir") + list(APPEND FEATURE_OPTIONS + -DMLIR_INSTALL_PACKAGE_DIR:PATH=share/mlir + -DMLIR_TOOLS_INSTALL_DIR:PATH=tools/llvm + -DMLIR_INSTALL_AGGREGATE_OBJECTS=OFF # Disables installation of object files in lib/objects-{CMAKE_BUILD_TYPE}. + ) + if("enable-mlir-python-bindings" IN_LIST FEATURES) + list(APPEND FEATURE_OPTIONS + -DMLIR_ENABLE_BINDINGS_PYTHON=ON + "-Dpybind11_DIR=${CURRENT_INSTALLED_DIR}/share/pybind11" + ) + endif() +endif() +if("polly" IN_LIST FEATURES) + list(APPEND LLVM_ENABLE_PROJECTS "polly") + list(APPEND FEATURE_OPTIONS + -DPOLLY_INSTALL_PACKAGE_DIR:PATH=share/polly + ) +endif() + +# Supported runtimes: libc;libclc;libcxx;libcxxabi;libunwind;compiler-rt;openmp;llvm-libgcc;offload;flang-rt +set(LLVM_ENABLE_RUNTIMES) +if("libc" IN_LIST FEATURES) + list(APPEND LLVM_ENABLE_RUNTIMES "libc") +endif() +if("libclc" IN_LIST FEATURES) + list(APPEND LLVM_ENABLE_RUNTIMES "libclc") +endif() +if("libcxx" IN_LIST FEATURES) + if(VCPKG_DETECTED_CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" AND VCPKG_DETECTED_MSVC_VERSION LESS "1914") + # libcxx supports being built with clang-cl, but not with MSVC’s cl.exe, as cl doesn’t support the #include_next extension. + # Furthermore, VS 2017 or newer (19.14) is required. + # More info: https://releases.llvm.org/17.0.1/projects/libcxx/docs/BuildingLibcxx.html#support-for-windows + message(FATAL_ERROR "libcxx requiries MSVC 19.14 or newer.") + endif() + list(APPEND LLVM_ENABLE_RUNTIMES "libcxx") +endif() +if("libcxxabi" IN_LIST FEATURES) + list(APPEND LLVM_ENABLE_RUNTIMES "libcxxabi") +endif() +if("libunwind" IN_LIST FEATURES) + list(APPEND LLVM_ENABLE_RUNTIMES "libunwind") +endif() +if("compiler-rt" IN_LIST FEATURES) + list(APPEND LLVM_ENABLE_RUNTIMES "compiler-rt") + vcpkg_check_features( + OUT_FEATURE_OPTIONS COMPILER_RT_FEATURE_OPTIONS + FEATURES + enable-ios COMPILER_RT_ENABLE_IOS + ) +endif() +if("openmp" IN_LIST FEATURES) + list(APPEND LLVM_ENABLE_RUNTIMES "openmp") +endif() + +# this is for normal targets +set(known_llvm_targets + AArch64 + AMDGPU + ARM + AVR + BPF + Hexagon + Lanai + LoongArch + Mips + MSP430 + NVPTX + PowerPC + RISCV + Sparc + SPIRV + SystemZ + VE + WebAssembly + X86 + XCore +) + +set(LLVM_TARGETS_TO_BUILD) +foreach(llvm_target IN LISTS known_llvm_targets) + string(TOLOWER "target-${llvm_target}" feature_name) + if(feature_name IN_LIST FEATURES) + list(APPEND LLVM_TARGETS_TO_BUILD "${llvm_target}") + endif() +endforeach() + +# this is for experimental targets +set(known_llvm_experimental_targets + ARC + CSKY + DirectX + M68k + Xtensa +) + +set(LLVM_EXPERIMENTAL_TARGETS_TO_BUILD) +foreach(llvm_target IN LISTS known_llvm_experimental_targets) + string(TOLOWER "target-${llvm_target}" feature_name) + if(feature_name IN_LIST FEATURES) + list(APPEND LLVM_EXPERIMENTAL_TARGETS_TO_BUILD "${llvm_target}") + endif() +endforeach() + +vcpkg_find_acquire_program(PYTHON3) +get_filename_component(PYTHON3_DIR ${PYTHON3} DIRECTORY) +vcpkg_add_to_path(PREPEND "${PYTHON3_DIR}") + +file(REMOVE "${SOURCE_PATH}/llvm/cmake/modules/Findzstd.cmake") + +if("${LLVM_ENABLE_RUNTIMES}" STREQUAL "") + list(APPEND FEATURE_OPTIONS + -DLLVM_INCLUDE_RUNTIMES=OFF + -DLLVM_BUILD_RUNTIMES=OFF + -DLLVM_BUILD_RUNTIME=OFF + ) +endif() + +# At least one target must be specified, otherwise default to "all". +if("${LLVM_TARGETS_TO_BUILD}" STREQUAL "") + set(LLVM_TARGETS_TO_BUILD "all") +endif() + +vcpkg_cmake_configure( + SOURCE_PATH "${SOURCE_PATH}/llvm" + OPTIONS + -DLLVM_INCLUDE_EXAMPLES=OFF + -DLLVM_BUILD_EXAMPLES=OFF + -DLLVM_INCLUDE_TESTS=OFF + -DLLVM_BUILD_TESTS=OFF + -DLLVM_INCLUDE_BENCHMARKS=OFF + -DLLVM_BUILD_BENCHMARKS=OFF + # Force TableGen to be built with optimization. This will significantly improve build time. + -DLLVM_OPTIMIZED_TABLEGEN=ON + -DPACKAGE_VERSION=${VERSION} + # Limit the maximum number of concurrent link jobs to 1. This should fix low amount of memory issue for link. + -DLLVM_PARALLEL_LINK_JOBS=1 + -DLLVM_INSTALL_PACKAGE_DIR:PATH=share/llvm + -DLLVM_TOOLS_INSTALL_DIR:PATH=tools/llvm + "-DLLVM_ENABLE_PROJECTS=${LLVM_ENABLE_PROJECTS}" + "-DLLVM_ENABLE_RUNTIMES=${LLVM_ENABLE_RUNTIMES}" + "-DLLVM_TARGETS_TO_BUILD=${LLVM_TARGETS_TO_BUILD}" + "-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=${LLVM_EXPERIMENTAL_TARGETS_TO_BUILD}" + ${FEATURE_OPTIONS} + ${CLANG_FEATURE_OPTIONS} + ${COMPILER_RT_FEATURE_OPTIONS} +) + +vcpkg_cmake_install(ADD_BIN_TO_PATH) + +function(llvm_cmake_package_config_fixup package_name) + cmake_parse_arguments("arg" "DO_NOT_DELETE_PARENT_CONFIG_PATH" "FEATURE_NAME;CONFIG_PATH" "" ${ARGN}) + if(NOT DEFINED arg_FEATURE_NAME) + set(arg_FEATURE_NAME ${package_name}) + endif() + if("${arg_FEATURE_NAME}" STREQUAL "${PORT}" OR "${arg_FEATURE_NAME}" IN_LIST FEATURES) + set(args) + list(APPEND args PACKAGE_NAME "${package_name}") + if(arg_DO_NOT_DELETE_PARENT_CONFIG_PATH) + list(APPEND args "DO_NOT_DELETE_PARENT_CONFIG_PATH") + endif() + if(arg_CONFIG_PATH) + list(APPEND args "CONFIG_PATH" "${arg_CONFIG_PATH}") + endif() + vcpkg_cmake_config_fixup(${args}) + file(INSTALL "${SOURCE_PATH}/${arg_FEATURE_NAME}/LICENSE.TXT" DESTINATION "${CURRENT_PACKAGES_DIR}/share/${package_name}" RENAME copyright) + if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/${package_name}_usage") + file(INSTALL "${CMAKE_CURRENT_LIST_DIR}/${package_name}_usage" DESTINATION "${CURRENT_PACKAGES_DIR}/share/${package_name}" RENAME usage) + endif() + endif() +endfunction() + +llvm_cmake_package_config_fixup("clang" DO_NOT_DELETE_PARENT_CONFIG_PATH) +llvm_cmake_package_config_fixup("flang" DO_NOT_DELETE_PARENT_CONFIG_PATH) +llvm_cmake_package_config_fixup("lld" DO_NOT_DELETE_PARENT_CONFIG_PATH) +llvm_cmake_package_config_fixup("mlir" DO_NOT_DELETE_PARENT_CONFIG_PATH) +llvm_cmake_package_config_fixup("polly" DO_NOT_DELETE_PARENT_CONFIG_PATH) +llvm_cmake_package_config_fixup("llvm") + +if(EXISTS "${CURRENT_PACKAGES_DIR}/debug/share/pkgconfig") + file(RENAME "${CURRENT_PACKAGES_DIR}/debug/share/pkgconfig" "${CURRENT_PACKAGES_DIR}/debug/lib/pkgconfig") +endif() +if(EXISTS "${CURRENT_PACKAGES_DIR}/share/pkgconfig") + file(RENAME "${CURRENT_PACKAGES_DIR}/share/pkgconfig" "${CURRENT_PACKAGES_DIR}/lib/pkgconfig") +endif() +vcpkg_fixup_pkgconfig() + +vcpkg_copy_tool_dependencies("${CURRENT_PACKAGES_DIR}/tools/${PORT}") + +# Move Clang's runtime libraries from bin/lib to tools/${PORT}/lib +if(EXISTS "${CURRENT_PACKAGES_DIR}/bin/lib") + file(MAKE_DIRECTORY "${CURRENT_PACKAGES_DIR}/tools/${PORT}") + file(RENAME "${CURRENT_PACKAGES_DIR}/bin/lib" "${CURRENT_PACKAGES_DIR}/tools/${PORT}/lib") +endif() +if(EXISTS "${CURRENT_PACKAGES_DIR}/debug/bin/lib") + file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/bin/lib") +endif() + +# Remove debug headers and tools +if(NOT DEFINED VCPKG_BUILD_TYPE OR VCPKG_BUILD_TYPE STREQUAL "debug") + file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/include" + "${CURRENT_PACKAGES_DIR}/debug/share" + "${CURRENT_PACKAGES_DIR}/debug/tools" + ) +endif() + +# LLVM generates shared libraries in a static build (LLVM-C.dll, libclang.dll, LTO.dll, Remarks.dll, ...) +# for the corresponding export targets (used in LLVMExports-.cmake files on the Windows platform) +if(VCPKG_TARGET_IS_WINDOWS) + set(VCPKG_POLICY_DLLS_IN_STATIC_LIBRARY enabled) +else() + file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/bin" + "${CURRENT_PACKAGES_DIR}/debug/bin" + ) +endif() \ No newline at end of file diff --git a/ci/vcpkg/overlay/llvm/vcpkg.json b/ci/vcpkg/overlay/llvm/vcpkg.json new file mode 100644 index 000000000000..e94c2335de0a --- /dev/null +++ b/ci/vcpkg/overlay/llvm/vcpkg.json @@ -0,0 +1,488 @@ +{ + "name": "llvm", + "version": "21.1.1", + "description": "The LLVM Compiler Infrastructure.", + "homepage": "https://llvm.org", + "license": "Apache-2.0 WITH LLVM-exception", + "supports": "!uwp & !(arm & windows)", + "dependencies": [ + { + "name": "atl", + "platform": "windows & !mingw" + }, + { + "name": "vcpkg-cmake", + "host": true + }, + { + "name": "vcpkg-cmake-config", + "host": true + }, + { + "name": "vcpkg-cmake-get-vars", + "host": true + } + ], + "default-features": [ + "clang", + "default-targets", + "enable-bindings", + "enable-zlib", + "enable-zstd", + "lld", + "tools" + ], + "features": { + "bolt": { + "description": "BOLT is a post-link optimizer developed to speed up large applications.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "tools" + ] + } + ] + }, + "clang": { + "description": "Include C Language Family Front-end.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "tools" + ] + } + ] + }, + "clang-enable-cir": { + "description": "Include ClangIR.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "clang", + "mlir" + ] + } + ] + }, + "clang-enable-static-analyzer": { + "description": "Include static analyzer in Clang binary.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "clang" + ] + } + ] + }, + "clang-tools-extra": { + "description": "Include Clang tools.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "clang" + ] + } + ] + }, + "compiler-rt": { + "description": "Include compiler's runtime libraries.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "clang" + ] + } + ] + }, + "default-targets": { + "description": "Build with platform-specific default targets.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "target-aarch64" + ], + "platform": "arm64" + }, + { + "name": "llvm", + "default-features": false, + "features": [ + "target-x86" + ], + "platform": "x86 | x64" + }, + { + "name": "llvm", + "default-features": false, + "features": [ + "target-arm" + ], + "platform": "arm & !arm64" + } + ] + }, + "enable-abi-breaking-checks": { + "description": "Build LLVM with LLVM_ABI_BREAKING_CHECKS=FORCE_ON." + }, + "enable-assertions": { + "description": "Build LLVM with assertions." + }, + "enable-bindings": { + "description": "Build bindings." + }, + "enable-eh": { + "description": "Build LLVM with exception handler.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "enable-rtti" + ] + } + ] + }, + "enable-ffi": { + "description": "Build LLVM with FFI.", + "dependencies": [ + "libffi" + ] + }, + "enable-ios": { + "description": "Build compiler-rt for iOS SDK.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "target-arm" + ] + } + ] + }, + "enable-libxml2": { + "description": "Build with LibXml2.", + "dependencies": [ + "libxml2" + ] + }, + "enable-mlir-python-bindings": { + "description": "Build MLIR Python bindings.", + "supports": "!(windows & static)", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "mlir" + ] + }, + "pybind11", + "python3" + ] + }, + "enable-rtti": { + "description": "Build LLVM with run-time type information." + }, + "enable-zlib": { + "description": "Build with ZLib.", + "dependencies": [ + "zlib" + ] + }, + "enable-zstd": { + "description": "Build with zstd.", + "dependencies": [ + "zstd" + ] + }, + "export-symbols": { + "description": "Export symbols for plugins." + }, + "flang": { + "description": "Include Fortran front end.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "clang", + "mlir", + "tools" + ] + } + ] + }, + "libc": { + "description": "Include libc library.", + "supports": "linux", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "clang", + "tools" + ] + } + ] + }, + "libclc": { + "description": "Include OpenCL library.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "clang", + "tools" + ] + } + ] + }, + "libcxx": { + "description": "Include libcxx library.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "clang", + "libcxxabi", + "tools" + ] + } + ] + }, + "libcxxabi": { + "description": "Include libcxxabi library.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "clang", + "libcxx", + "tools" + ] + } + ] + }, + "libunwind": { + "description": "Include libunwind library.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "tools" + ] + } + ] + }, + "lld": { + "description": "Include LLVM linker.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "tools" + ] + } + ] + }, + "lldb": { + "description": "Include LLVM debugger.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "clang", + "tools" + ] + } + ] + }, + "mlir": { + "description": "Include MLIR (Multi-Level IR Compiler Framework) project.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "tools", + "utils" + ] + } + ] + }, + "openmp": { + "description": "Include LLVM OpenMP libraries.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "clang", + "utils" + ] + } + ] + }, + "polly": { + "description": "Include Polly (Polyhedral optimizations for LLVM) project.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "tools", + "utils" + ] + } + ] + }, + "target-aarch64": { + "description": "Build with AArch64 backend." + }, + "target-all": { + "description": "Build with all backends.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "target-aarch64", + "target-amdgpu", + "target-arc", + "target-arm", + "target-avr", + "target-bpf", + "target-csky", + "target-directx", + "target-hexagon", + "target-lanai", + "target-loongarch", + "target-m68k", + "target-mips", + "target-msp430", + "target-nvptx", + "target-powerpc", + "target-riscv", + "target-sparc", + "target-spirv", + "target-systemz", + "target-ve", + "target-webassembly", + "target-x86", + "target-xcore", + "target-xtensa" + ] + } + ] + }, + "target-amdgpu": { + "description": "Build with AMDGPU backend." + }, + "target-arc": { + "description": "Build with ARC backend (experimental)." + }, + "target-arm": { + "description": "Build with ARM backend." + }, + "target-avr": { + "description": "Build with AVR backend." + }, + "target-bpf": { + "description": "Build with BPF backend." + }, + "target-csky": { + "description": "Build with CSKY backend (experimental)." + }, + "target-directx": { + "description": "Build with DirectX backend (experimental)." + }, + "target-hexagon": { + "description": "Build with Hexagon backend." + }, + "target-lanai": { + "description": "Build with Lanai backend." + }, + "target-loongarch": { + "description": "Build with LoongArch backend." + }, + "target-m68k": { + "description": "Build with M68k backend (experimental)." + }, + "target-mips": { + "description": "Build with Mips backend." + }, + "target-msp430": { + "description": "Build with MSP430 backend." + }, + "target-nvptx": { + "description": "Build with NVPTX backend." + }, + "target-powerpc": { + "description": "Build with PowerPC backend." + }, + "target-riscv": { + "description": "Build with RISC-V backend." + }, + "target-sparc": { + "description": "Build with Sparc backend." + }, + "target-spirv": { + "description": "Build with SPIRV backend." + }, + "target-systemz": { + "description": "Build with SystemZ backend." + }, + "target-ve": { + "description": "Build with VE backend." + }, + "target-webassembly": { + "description": "Build with WebAssembly backend." + }, + "target-x86": { + "description": "Build with X86 backend." + }, + "target-xcore": { + "description": "Build with XCore backend." + }, + "target-xtensa": { + "description": "Build with Xtensa backend (experimental)." + }, + "tools": { + "description": "Build LLVM tools." + }, + "utils": { + "description": "Build LLVM utils.", + "dependencies": [ + { + "name": "llvm", + "default-features": false, + "features": [ + "tools" + ] + } + ] + } + } +} diff --git a/ci/vcpkg/overlay/symengine/portfile.cmake b/ci/vcpkg/overlay/symengine/portfile.cmake new file mode 100644 index 000000000000..8bb494ec611a --- /dev/null +++ b/ci/vcpkg/overlay/symengine/portfile.cmake @@ -0,0 +1,62 @@ +vcpkg_from_github( + OUT_SOURCE_PATH SOURCE_PATH + REPO symengine/symengine + REF "v${VERSION}" + SHA512 2b6012ed65064ff81c8828032c5a3148340582274e3604db2a43797ddbaa191520ed97da41efc2e842ba4a25326f53becc51f1e98935e8c34625bc5eaac8397f + HEAD_REF master +) + +vcpkg_check_features( + OUT_FEATURE_OPTIONS FEATURE_OPTIONS + FEATURES + arb WITH_ARB + flint WITH_FLINT + mpfr WITH_MPFR + tcmalloc WITH_TCMALLOC + llvm WITH_LLVM +) + +if(integer-class-flint IN_LIST FEATURES) + set(INTEGER_CLASS flint) +endif() + +if(VCPKG_TARGET_IS_UWP) + set(VCPKG_C_FLAGS "${VCPKG_C_FLAGS} -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE") + set(VCPKG_CXX_FLAGS "${VCPKG_CXX_FLAGS} -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE") +endif() + +vcpkg_cmake_configure( + SOURCE_PATH "${SOURCE_PATH}" + OPTIONS + -DINTEGER_CLASS=${INTEGER_CLASS} + -DBUILD_BENCHMARKS=no + -DBUILD_TESTS=no + -DMSVC_WARNING_LEVEL=3 + -DMSVC_USE_MT=no + -DWITH_SYMENGINE_RCP=yes + -DWITH_SYMENGINE_TEUCHOS=no + -DWITH_SYMENGINE_THREAD_SAFE=yes + ${FEATURE_OPTIONS} +) + +vcpkg_cmake_install() + +if(EXISTS "${CURRENT_PACKAGES_DIR}/CMake") + vcpkg_cmake_config_fixup(CONFIG_PATH CMake) +elseif(EXISTS "${CURRENT_PACKAGES_DIR}/lib/cmake/${PORT}") + vcpkg_cmake_config_fixup(CONFIG_PATH lib/cmake/${PORT}) +endif() + +vcpkg_copy_pdbs() + +file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/include") +file(REMOVE "${CURRENT_PACKAGES_DIR}/include/symengine/symengine_config_cling.h") + +vcpkg_replace_string("${CURRENT_PACKAGES_DIR}/share/symengine/SymEngineConfig.cmake" "${CURRENT_BUILDTREES_DIR}" "") # not used, inside if (False) +vcpkg_replace_string("${CURRENT_PACKAGES_DIR}/share/symengine/SymEngineConfig.cmake" + [[${SYMENGINE_CMAKE_DIR}/../../../include]] + [[${SYMENGINE_CMAKE_DIR}/../../include]] + IGNORE_UNCHANGED +) + +vcpkg_install_copyright(FILE_LIST "${SOURCE_PATH}/LICENSE") diff --git a/ci/vcpkg/overlay/symengine/vcpkg.json b/ci/vcpkg/overlay/symengine/vcpkg.json new file mode 100644 index 000000000000..702d5fd06f37 --- /dev/null +++ b/ci/vcpkg/overlay/symengine/vcpkg.json @@ -0,0 +1,73 @@ +{ + "name": "symengine", + "version": "0.14.0", + "description": "SymEngine is a fast symbolic manipulation library", + "homepage": "https://github.com/symengine/symengine", + "license": "BSD-3-Clause", + "supports": "!uwp", + "dependencies": [ + "boost-math", + "boost-random", + { + "name": "symengine", + "default-features": false, + "features": [ + "integer-class-flint" + ] + }, + { + "name": "vcpkg-cmake", + "host": true + }, + { + "name": "vcpkg-cmake-config", + "host": true + } + ], + "default-features": [ + "arb", + "llvm", + "mpfr" + ], + "features": { + "arb": { + "description": "Build with arb", + "dependencies": [ + "arb" + ] + }, + "flint": { + "description": "Build with flint", + "dependencies": [ + "flint" + ] + }, + "integer-class-flint": { + "description": "Use flint integer class", + "dependencies": [ + "flint" + ] + }, + "llvm": { + "description": "Build with LLVM", + "dependencies": [ + { + "name": "llvm", + "default-features": false + } + ] + }, + "mpfr": { + "description": "Build with mpfr", + "dependencies": [ + "mpfr" + ] + }, + "tcmalloc": { + "description": "Build with tcmalloc", + "dependencies": [ + "gperftools" + ] + } + } +} diff --git a/ci/vcpkg/vcpkg.json b/ci/vcpkg/vcpkg.json index 58b1382d1ca8..26479bdb8c13 100644 --- a/ci/vcpkg/vcpkg.json +++ b/ci/vcpkg/vcpkg.json @@ -78,6 +78,7 @@ { "name": "llvm", "default-features": false, + "version>=": "21.1.1", "features": [ "clang", "default-targets", From e5ecf3cb36e752e11ad4a73b207476b146181f35 Mon Sep 17 00:00:00 2001 From: Logan Riggs Date: Mon, 6 Oct 2025 13:54:06 -0700 Subject: [PATCH 15/18] add enable-terminfo to vcpkg overlay --- ci/vcpkg/overlay/llvm/vcpkg.json | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ci/vcpkg/overlay/llvm/vcpkg.json b/ci/vcpkg/overlay/llvm/vcpkg.json index e94c2335de0a..df3f3bb2b053 100644 --- a/ci/vcpkg/overlay/llvm/vcpkg.json +++ b/ci/vcpkg/overlay/llvm/vcpkg.json @@ -25,6 +25,7 @@ ], "default-features": [ "clang", + "enable-terminfo", "default-targets", "enable-bindings", "enable-zlib", @@ -204,6 +205,9 @@ "zlib" ] }, + "enable-terminfo": { + "description": "Use terminfo database if available." + }, "enable-zstd": { "description": "Build with zstd.", "dependencies": [ From 5d7e239fba55046ebd17773bd6edb530c9619b89 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Wed, 11 Mar 2026 11:49:57 -0400 Subject: [PATCH 16/18] Fix macOS build to use vcpkg-installed LLVM Add LLVM path detection logic that: - Determines vcpkg triplet based on architecture - Sets LLVM_DIR to point to vcpkg-installed LLVM when VCPKG_ROOT_LOCAL is set - Configures CMAKE_OSX_SYSROOT for proper SDK detection - Sets ARROW_GANDIVA_PC_CXX_FLAGS for C++ headers - Uses vcpkg's RE2 for ABI compatibility with LLVM/Abseil This matches the build script from dremio-arrow-java. --- ci/scripts/java_jni_macos_build.sh | 56 +++++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/ci/scripts/java_jni_macos_build.sh b/ci/scripts/java_jni_macos_build.sh index 4ecc029bdd3c..407ad3efb1ab 100755 --- a/ci/scripts/java_jni_macos_build.sh +++ b/ci/scripts/java_jni_macos_build.sh @@ -63,6 +63,56 @@ export ARROW_TEST_DATA="${arrow_dir}/testing/data" export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data" export AWS_EC2_METADATA_DISABLED=TRUE +# Determine vcpkg triplet based on architecture +vcpkg_arch=$(arch) +case ${vcpkg_arch} in + arm64) + vcpkg_triplet="arm64-osx" + ;; + i386|x86_64) + vcpkg_triplet="x64-osx" + ;; + *) + vcpkg_triplet="arm64-osx" + ;; +esac + +# Set LLVM_DIR to point to vcpkg-installed LLVM if VCPKG_ROOT_LOCAL is set +llvm_dir_arg="" +gandiva_cxx_flags="" +osx_sysroot_arg="" +re2_source_arg="-Dre2_SOURCE=BUNDLED" +if [ -n "${VCPKG_ROOT_LOCAL:-}" ]; then + vcpkg_installed="${VCPKG_ROOT_LOCAL}/installed/${vcpkg_triplet}" + llvm_cmake_dir="${vcpkg_installed}/share/llvm" + if [ -d "${llvm_cmake_dir}" ]; then + echo "=== Found vcpkg LLVM at ${llvm_cmake_dir} ===" + llvm_dir_arg="-DLLVM_DIR=${llvm_cmake_dir}" + + # vcpkg's clang needs to know where to find system headers + # Arrow's GandivaAddBitcode.cmake uses CMAKE_OSX_SYSROOT to set SDKROOT env var + sdk_path="$(xcrun --show-sdk-path)" + if [ -d "${sdk_path}" ]; then + osx_sysroot_arg="-DCMAKE_OSX_SYSROOT=${sdk_path}" + fi + + # Also pass the C++ standard library include path via ARROW_GANDIVA_PC_CXX_FLAGS + xcode_path="$(xcode-select -p)" + cxx_include_path="${xcode_path}/Toolchains/XcodeDefault.xctoolchain/usr/include/c++/v1" + if [ -d "${cxx_include_path}" ]; then + gandiva_cxx_flags="-DARROW_GANDIVA_PC_CXX_FLAGS=-stdlib=libc++;-isystem;${cxx_include_path}" + fi + + # Use vcpkg's RE2 since it's installed as a dependency of LLVM + # This ensures ABI compatibility - vcpkg's RE2 uses std::string_view API + # which matches what vcpkg's LLVM and Abseil expect + re2_cmake_dir="${vcpkg_installed}/share/re2" + if [ -d "${re2_cmake_dir}" ]; then + re2_source_arg="-Dre2_ROOT=${vcpkg_installed}" + fi + fi +fi + mkdir -p "${build_dir}/cpp" pushd "${build_dir}/cpp" @@ -81,14 +131,18 @@ cmake \ -DARROW_PARQUET=${ARROW_PARQUET} \ -DARROW_S3=${ARROW_S3} \ -DARROW_USE_CCACHE=${ARROW_USE_CCACHE} \ + -DAWSSDK_SOURCE=BUNDLED \ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ -DCMAKE_INSTALL_PREFIX=${install_dir} \ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \ -DGTest_SOURCE=BUNDLED \ + ${llvm_dir_arg} \ + ${osx_sysroot_arg} \ + ${gandiva_cxx_flags} \ -DPARQUET_BUILD_EXAMPLES=OFF \ -DPARQUET_BUILD_EXECUTABLES=OFF \ -DPARQUET_REQUIRE_ENCRYPTION=OFF \ - -Dre2_SOURCE=BUNDLED \ + ${re2_source_arg} \ -GNinja \ ${arrow_dir}/cpp cmake --build . --target install From 9a691ec20bd8fbf6c92ab0c6223b369cf7fc55e1 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Wed, 11 Mar 2026 20:55:58 -0400 Subject: [PATCH 17/18] Add LLVM 21.1 to supported versions list vcpkg installs LLVM 21.1.1 via the overlay, but the ARROW_LLVM_VERSIONS list only went up to 19.1, causing CMake's FindLLVMAlt.cmake to reject the installed LLVM as incompatible. Use 21.1 (not just 21) to match CMake's version compatibility rules which require MAJOR.MINOR format for proper matching. --- cpp/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 6c0c1323645e..2471bbfa2ff7 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -160,6 +160,7 @@ set(ARROW_DOC_DIR "share/doc/${PROJECT_NAME}") set(BUILD_SUPPORT_DIR "${CMAKE_SOURCE_DIR}/build-support") set(ARROW_LLVM_VERSIONS + "21.1" "19.1" "18.1" "17.0" From 1f3608d076af33fb78f59da04ae1d229b768f4a3 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Thu, 12 Mar 2026 10:34:50 -0400 Subject: [PATCH 18/18] Force bundled Boost to avoid conflict with Homebrew's partial install The macOS CI runner has Homebrew's Boost 1.90.0 installed, but it's missing the 'system' component. This causes Arrow's ThirdpartyToolchain to detect the partial installation and then attempt to build Boost from source as a fallback. However, this creates a conflict because some Boost targets (like Boost::filesystem) were already imported from the system Boost before the fallback was triggered. Setting Boost_SOURCE=BUNDLED tells Arrow to ignore the system Boost entirely and build all components from source, avoiding the conflict. --- ci/scripts/java_jni_macos_build.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/scripts/java_jni_macos_build.sh b/ci/scripts/java_jni_macos_build.sh index 407ad3efb1ab..7fcca77b2951 100755 --- a/ci/scripts/java_jni_macos_build.sh +++ b/ci/scripts/java_jni_macos_build.sh @@ -132,6 +132,7 @@ cmake \ -DARROW_S3=${ARROW_S3} \ -DARROW_USE_CCACHE=${ARROW_USE_CCACHE} \ -DAWSSDK_SOURCE=BUNDLED \ + -DBoost_SOURCE=BUNDLED \ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ -DCMAKE_INSTALL_PREFIX=${install_dir} \ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \