From 608723be48b249558805011c697d29497a002a0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 16 Jun 2026 17:07:54 +0200 Subject: [PATCH 1/4] GH-50194: [C++] Move S3 and AWS-SDK to its own libarrow_s3.so and add required ARROW_EXPORTs --- ci/docker/conda-cpp.dockerfile | 1 - ci/scripts/cpp_build.sh | 2 - cpp/cmake_modules/DefineOptions.cmake | 8 +--- cpp/src/arrow/ArrowS3Config.cmake.in | 38 +++++++++++++++ cpp/src/arrow/CMakeLists.txt | 48 ++++++++++++------- cpp/src/arrow/arrow-s3.pc.in | 28 +++++++++++ cpp/src/arrow/filesystem/CMakeLists.txt | 13 +++--- cpp/src/arrow/filesystem/filesystem.h | 1 + cpp/src/arrow/filesystem/s3_visibility.h | 49 ++++++++++++++++++++ cpp/src/arrow/filesystem/s3fs.h | 25 +++++----- cpp/src/arrow/filesystem/s3fs_module_test.cc | 7 +-- cpp/src/arrow/filesystem/util_internal.h | 2 + python/CMakeLists.txt | 12 +++++ 13 files changed, 182 insertions(+), 52 deletions(-) create mode 100644 cpp/src/arrow/ArrowS3Config.cmake.in create mode 100644 cpp/src/arrow/arrow-s3.pc.in create mode 100644 cpp/src/arrow/filesystem/s3_visibility.h diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile index 474c18a9ade5..151c8b1d52db 100644 --- a/ci/docker/conda-cpp.dockerfile +++ b/ci/docker/conda-cpp.dockerfile @@ -73,7 +73,6 @@ ENV ARROW_ACERO=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ - ARROW_S3_MODULE=ON \ ARROW_SUBSTRAIT=ON \ ARROW_USE_CCACHE=ON \ ARROW_USE_MOLD=ON \ diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index 3d9b2ba72d41..1e2f3e8f8f1a 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -70,7 +70,6 @@ if [ "${ARROW_ENABLE_THREADING:-ON}" = "OFF" ]; then ARROW_JEMALLOC=OFF ARROW_MIMALLOC=OFF ARROW_S3=OFF - ARROW_S3_MODULE=OFF ARROW_WITH_OPENTELEMETRY=OFF fi @@ -230,7 +229,6 @@ else -DARROW_PARQUET=${ARROW_PARQUET:-OFF} \ -DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \ -DARROW_S3=${ARROW_S3:-OFF} \ - -DARROW_S3_MODULE=${ARROW_S3_MODULE:-OFF} \ -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL:-DEFAULT} \ -DARROW_SUBSTRAIT=${ARROW_SUBSTRAIT:-OFF} \ -DARROW_TEST_LINKAGE=${ARROW_TEST_LINKAGE:-shared} \ diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake index c372f9f19898..a8cabdf9bb0b 100644 --- a/cpp/cmake_modules/DefineOptions.cmake +++ b/cpp/cmake_modules/DefineOptions.cmake @@ -390,17 +390,11 @@ takes precedence over ccache if a storage backend is configured" ON) ARROW_JSON) define_option(ARROW_S3 - "Build Arrow with S3 support (requires the AWS SDK for C++)" + "Build Arrow S3 Module (requires the AWS SDK for C++)" OFF DEPENDS ARROW_FILESYSTEM) - define_option(ARROW_S3_MODULE - "Build the Arrow S3 filesystem as a dynamic module" - OFF - DEPENDS - ARROW_S3) - define_option(ARROW_SUBSTRAIT "Build the Arrow Substrait Consumer Module" OFF diff --git a/cpp/src/arrow/ArrowS3Config.cmake.in b/cpp/src/arrow/ArrowS3Config.cmake.in new file mode 100644 index 000000000000..7d95634420fa --- /dev/null +++ b/cpp/src/arrow/ArrowS3Config.cmake.in @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# This config sets the following variables in your project:: +# +# ArrowS3_FOUND - true if Arrow S3 found on the system +# +# This config sets the following targets in your project:: +# +# ArrowS3::arrow_s3_shared - for linked as shared library if shared library is built +# ArrowS3::arrow_s3_static - for linked as static library if static library is built + +@PACKAGE_INIT@ + +include(CMakeFindDependencyMacro) +find_dependency(Arrow CONFIG) + +include("${CMAKE_CURRENT_LIST_DIR}/ArrowS3Targets.cmake") + +arrow_keep_backward_compatibility(ArrowS3 arrow_s3) + +check_required_components(ArrowS3) + +arrow_show_details(ArrowS3 ARROW_S3) diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 8750598f6c3b..13855e5c5386 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -960,11 +960,6 @@ if(ARROW_FILESYSTEM) if(ARROW_HDFS) list(APPEND ARROW_FILESYSTEM_SRCS filesystem/hdfs.cc) endif() - if(ARROW_S3) - list(APPEND ARROW_FILESYSTEM_SRCS filesystem/s3fs.cc) - set_source_files_properties(filesystem/s3fs.cc PROPERTIES SKIP_UNITY_BUILD_INCLUSION - ON) - endif() arrow_add_object_library(ARROW_FILESYSTEM ${ARROW_FILESYSTEM_SRCS}) if(ARROW_AZURE) @@ -984,21 +979,38 @@ if(ARROW_FILESYSTEM) endforeach() endif() if(ARROW_S3) - foreach(ARROW_FILESYSTEM_TARGET ${ARROW_FILESYSTEM_TARGETS}) - target_link_libraries(${ARROW_FILESYSTEM_TARGET} PRIVATE ${AWSSDK_LINK_LIBRARIES}) + if(NOT ARROW_BUILD_SHARED AND ARROW_BUILD_STATIC) + string(APPEND ARROW_S3_PC_CFLAGS "${ARROW_S3_PC_CFLAGS_PRIVATE}") + set(ARROW_S3_PC_CFLAGS_PRIVATE "") + endif() + list(APPEND ARROW_S3_LIB_SRCS filesystem/s3fs_module.cc filesystem/s3fs.cc) + add_arrow_lib(arrow_s3 + CMAKE_PACKAGE_NAME + ArrowS3 + PKG_CONFIG_NAME + arrow-s3 + SOURCES + ${ARROW_S3_LIB_SRCS} + SHARED_LINK_LIBS + arrow_shared + SHARED_PRIVATE_LINK_LIBS + ${AWSSDK_LINK_LIBRARIES} + SHARED_INSTALL_INTERFACE_LIBS + ${ARROW_S3_SHARED_INSTALL_INTERFACE_LIBS} + STATIC_LINK_LIBS + arrow_static + ${AWSSDK_LINK_LIBRARIES} + OUTPUTS + ARROW_S3_LIBRARIES) + foreach(LIB_TARGET ${ARROW_S3_LIBRARIES}) + target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_S3_EXPORTING) endforeach() - - if(ARROW_S3_MODULE) - if(NOT ARROW_BUILD_SHARED) - message(FATAL_ERROR "ARROW_S3_MODULE without shared libarrow (-DARROW_BUILD_SHARED=ON) is not supported" - ) - endif() - - add_library(arrow_s3fs MODULE filesystem/s3fs_module.cc filesystem/s3fs.cc) - target_link_libraries(arrow_s3fs PRIVATE ${AWSSDK_LINK_LIBRARIES} arrow_shared) - set_source_files_properties(filesystem/s3fs.cc filesystem/s3fs_module.cc - PROPERTIES SKIP_UNITY_BUILD_INCLUSION ON) + if(ARROW_BUILD_STATIC AND WIN32) + target_compile_definitions(arrow_s3_static PUBLIC ARROW_S3_STATIC) endif() + + set_source_files_properties(filesystem/s3fs.cc filesystem/s3fs_module.cc + PROPERTIES SKIP_UNITY_BUILD_INCLUSION ON) endif() list(APPEND ARROW_TESTING_SHARED_LINK_LIBS ${ARROW_GTEST_GMOCK}) diff --git a/cpp/src/arrow/arrow-s3.pc.in b/cpp/src/arrow/arrow-s3.pc.in new file mode 100644 index 000000000000..cab1894d2033 --- /dev/null +++ b/cpp/src/arrow/arrow-s3.pc.in @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +prefix=@CMAKE_INSTALL_PREFIX@ +includedir=@ARROW_PKG_CONFIG_INCLUDEDIR@ +libdir=@ARROW_PKG_CONFIG_LIBDIR@ + +Name: Apache Arrow S3 +Description: Apache Arrow's S3 filesystem implementation. +Version: @ARROW_VERSION@ +Requires: arrow +Libs: -L${libdir} -larrow_s3 +Cflags:@ARROW_S3_PC_CFLAGS@ +Cflags.private:@ARROW_S3_PC_CFLAGS_PRIVATE@ diff --git a/cpp/src/arrow/filesystem/CMakeLists.txt b/cpp/src/arrow/filesystem/CMakeLists.txt index ee46f4d256ce..2827bee91ded 100644 --- a/cpp/src/arrow/filesystem/CMakeLists.txt +++ b/cpp/src/arrow/filesystem/CMakeLists.txt @@ -73,9 +73,9 @@ if(ARROW_S3) # static variables storage of AWS SDK for C++ in libaws*.a may be # mixed with one in libarrow. if(ARROW_TEST_LINKAGE STREQUAL "shared") - list(APPEND ARROW_S3_TEST_EXTRA_LINK_LIBS arrow_shared) + list(APPEND ARROW_S3_TEST_EXTRA_LINK_LIBS arrow_s3_shared) else() - list(APPEND ARROW_S3_TEST_EXTRA_LINK_LIBS arrow_static) + list(APPEND ARROW_S3_TEST_EXTRA_LINK_LIBS arrow_s3_static) endif() list(APPEND ARROW_S3_TEST_EXTRA_LINK_LIBS ${AWSSDK_LINK_LIBRARIES}) add_arrow_test(s3fs_test @@ -107,7 +107,7 @@ if(ARROW_S3) if(ARROW_BUILD_TESTS) add_executable(arrow-s3fs-narrative-test s3fs_narrative_test.cc) target_link_libraries(arrow-s3fs-narrative-test ${ARROW_TEST_LINK_LIBS} - ${GFLAGS_LIBRARIES}) + ${ARROW_S3_TEST_EXTRA_LINK_LIBS} ${GFLAGS_LIBRARIES}) add_dependencies(arrow-tests arrow-s3fs-narrative-test) endif() @@ -119,6 +119,7 @@ if(ARROW_S3) s3fs_benchmark.cc s3_test_util.cc STATIC_LINK_LIBS + ${ARROW_S3_TEST_EXTRA_LINK_LIBS} ${AWSSDK_LINK_LIBRARIES} ${ARROW_BENCHMARK_LINK_LIBS}) if(ARROW_TEST_LINKAGE STREQUAL "static") @@ -128,7 +129,7 @@ if(ARROW_S3) endif() endif() - if(ARROW_S3_MODULE AND ARROW_BUILD_TESTS) + if(ARROW_BUILD_TESTS) add_arrow_test(s3fs_module_test SOURCES s3fs_module_test.cc @@ -136,11 +137,11 @@ if(ARROW_S3) EXTRA_LABELS filesystem DEFINITIONS - ARROW_S3_LIBPATH="$" + ARROW_S3_LIBPATH="$" EXTRA_LINK_LIBS Boost::filesystem) target_compile_definitions(arrow-filesystem-test - PUBLIC ARROW_S3_LIBPATH="$") + PUBLIC ARROW_S3_LIBPATH="$") target_sources(arrow-filesystem-test PUBLIC s3fs_module_test.cc s3_test_util.cc) target_link_libraries(arrow-filesystem-test PUBLIC Boost::filesystem) endif() diff --git a/cpp/src/arrow/filesystem/filesystem.h b/cpp/src/arrow/filesystem/filesystem.h index a2862d9c1f6f..b5563d870030 100644 --- a/cpp/src/arrow/filesystem/filesystem.h +++ b/cpp/src/arrow/filesystem/filesystem.h @@ -560,6 +560,7 @@ class ARROW_EXPORT SlowFileSystem : public FileSystem { /// will fail with an error. /// /// The user is responsible for synchronization of calls to this function. +ARROW_EXPORT void EnsureFinalized(); /// \defgroup filesystem-factories Functions for creating FileSystem instances diff --git a/cpp/src/arrow/filesystem/s3_visibility.h b/cpp/src/arrow/filesystem/s3_visibility.h new file mode 100644 index 000000000000..c67d29d9b7eb --- /dev/null +++ b/cpp/src/arrow/filesystem/s3_visibility.h @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#if defined(_WIN32) || defined(__CYGWIN__) +# if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable : 4251) +# else +# pragma GCC diagnostic ignored "-Wattributes" +# endif + +# ifdef ARROW_S3_STATIC +# define ARROW_S3_EXPORT +# elif defined(ARROW_S3_EXPORTING) +# define ARROW_S3_EXPORT __declspec(dllexport) +# else +# define ARROW_S3_EXPORT __declspec(dllimport) +# endif + +# define ARROW_S3_NO_EXPORT + +# if defined(_MSC_VER) +# pragma warning(pop) +# endif + +#else // Not Windows +# ifndef ARROW_S3_EXPORT +# define ARROW_S3_EXPORT __attribute__((visibility("default"))) +# endif +# ifndef ARROW_S3_NO_EXPORT +# define ARROW_S3_NO_EXPORT __attribute__((visibility("hidden"))) +# endif +#endif diff --git a/cpp/src/arrow/filesystem/s3fs.h b/cpp/src/arrow/filesystem/s3fs.h index 47d249898be6..5086b3407c17 100644 --- a/cpp/src/arrow/filesystem/s3fs.h +++ b/cpp/src/arrow/filesystem/s3fs.h @@ -23,6 +23,7 @@ #include #include "arrow/filesystem/filesystem.h" +#include "arrow/filesystem/s3_visibility.h" #include "arrow/util/macros.h" #include "arrow/util/uri.h" @@ -38,7 +39,7 @@ class STSClient; namespace arrow::fs { /// Options for using a proxy for S3 -struct ARROW_EXPORT S3ProxyOptions { +struct ARROW_S3_EXPORT S3ProxyOptions { std::string scheme; std::string host; int port = -1; @@ -67,7 +68,7 @@ enum class S3CredentialsKind : int8_t { }; /// Pure virtual class for describing custom S3 retry strategies -class ARROW_EXPORT S3RetryStrategy { +class ARROW_S3_EXPORT S3RetryStrategy { public: virtual ~S3RetryStrategy() = default; @@ -96,7 +97,7 @@ class ARROW_EXPORT S3RetryStrategy { }; /// Options for the S3FileSystem implementation. -struct ARROW_EXPORT S3Options { +struct ARROW_S3_EXPORT S3Options { /// \brief Smart defaults for option values /// /// The possible values for this setting are explained in the AWS docs: @@ -315,7 +316,7 @@ struct ARROW_EXPORT S3Options { /// Some implementation notes: /// - buckets are special and the operations available on them may be limited /// or more expensive than desired. -class ARROW_EXPORT S3FileSystem : public FileSystem { +class ARROW_S3_EXPORT S3FileSystem : public FileSystem { public: ~S3FileSystem() override; @@ -409,7 +410,7 @@ class ARROW_EXPORT S3FileSystem : public FileSystem { enum class S3LogLevel : int8_t { Off, Fatal, Error, Warn, Info, Debug, Trace }; -struct ARROW_EXPORT S3GlobalOptions { +struct ARROW_S3_EXPORT S3GlobalOptions { /// The log level for S3-originating messages. S3LogLevel log_level; @@ -444,21 +445,21 @@ struct ARROW_EXPORT S3GlobalOptions { /// /// Once this function is called you MUST call FinalizeS3 before the end of the /// application in order to avoid a segmentation fault at shutdown. -ARROW_EXPORT +ARROW_S3_EXPORT Status InitializeS3(const S3GlobalOptions& options); /// \brief Ensure the S3 APIs are initialized, but only if not already done. /// /// If necessary, this will call InitializeS3() with some default options. -ARROW_EXPORT +ARROW_S3_EXPORT Status EnsureS3Initialized(); /// Whether S3 was initialized, and not finalized. -ARROW_EXPORT +ARROW_S3_EXPORT bool IsS3Initialized(); /// Whether S3 was finalized. -ARROW_EXPORT +ARROW_S3_EXPORT bool IsS3Finalized(); /// \brief Shutdown the S3 APIs. @@ -470,16 +471,16 @@ bool IsS3Finalized(); /// Calls to InitializeS3() and FinalizeS3() should be serialized by the /// application (this also applies to EnsureS3Initialized() and /// EnsureS3Finalized()). -ARROW_EXPORT +ARROW_S3_EXPORT Status FinalizeS3(); /// \brief Ensure the S3 APIs are shutdown, but only if not already done. /// /// If necessary, this will call FinalizeS3(). -ARROW_EXPORT +ARROW_S3_EXPORT Status EnsureS3Finalized(); -ARROW_EXPORT +ARROW_S3_EXPORT Result ResolveS3BucketRegion(const std::string& bucket); } // namespace arrow::fs diff --git a/cpp/src/arrow/filesystem/s3fs_module_test.cc b/cpp/src/arrow/filesystem/s3fs_module_test.cc index f07b07fb6c6f..cc6a1b13bfb7 100644 --- a/cpp/src/arrow/filesystem/s3fs_module_test.cc +++ b/cpp/src/arrow/filesystem/s3fs_module_test.cc @@ -68,12 +68,7 @@ class S3ModuleTest : public ::testing::Test { class RegistrationTestEnvironment : public ::testing::Environment { public: - void SetUp() override { - // Unregister the s3 filesystem factory so that we can be sure the module loading and - // the factories from the module are actually working - ASSERT_OK(internal::UnregisterFileSystemFactory("s3")); - ASSERT_OK(LoadFileSystemFactories(ARROW_S3_LIBPATH)); - } + void SetUp() override { ASSERT_OK(LoadFileSystemFactories(ARROW_S3_LIBPATH)); } void TearDown() override { EnsureFinalized(); } }; diff --git a/cpp/src/arrow/filesystem/util_internal.h b/cpp/src/arrow/filesystem/util_internal.h index 220640b657bb..40f402f534cf 100644 --- a/cpp/src/arrow/filesystem/util_internal.h +++ b/cpp/src/arrow/filesystem/util_internal.h @@ -83,6 +83,7 @@ enum class AuthorityHandlingBehavior { /// \param supported_schemes the set of URI schemes that should be accepted /// \param accept_local_paths if true, allow an absolute path /// \return the path portion of the URI +ARROW_EXPORT Result PathFromUriHelper(const std::string& uri_string, std::vector supported_schemes, bool accept_local_paths, @@ -95,6 +96,7 @@ ARROW_EXPORT Result GlobFiles(const std::shared_ptr& filesystem, const std::string& glob); +ARROW_EXPORT extern FileSystemGlobalOptions global_options; /// \brief Unregister filesystem factories diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index d0ddb9009f89..ee57bed130ac 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -742,6 +742,14 @@ if(PYARROW_BUILD_S3) if(NOT ARROW_S3) message(FATAL_ERROR "You must build Arrow C++ with ARROW_S3=ON") endif() + find_package(ArrowS3 REQUIRED) + if(PYARROW_BUNDLE_ARROW_CPP) + bundle_arrow_lib(${ARROW_S3_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) + if(MSVC) + bundle_arrow_import_lib(${ARROW_S3_IMPORT_LIB}) + endif() + endif() + set(S3_LINK_LIBS ArrowS3::arrow_s3_shared) list(APPEND CYTHON_EXTENSIONS _s3fs) endif() @@ -1038,6 +1046,10 @@ if(PYARROW_BUILD_PARQUET) endif() endif() +if(PYARROW_BUILD_S3) + target_link_libraries(_s3fs PRIVATE ${S3_LINK_LIBS}) +endif() + # # Type stubs with docstring injection # From bf162b6dbbc264296bfd437fcc17f70530fffe3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 18 Jun 2026 15:10:06 +0200 Subject: [PATCH 2/4] arrow-filesystem-test shouldn't require s3fs_module_test.cc as it is covered on s3fs_module_test and s3fs_module_test doesn't make sense on static builds --- cpp/src/arrow/filesystem/CMakeLists.txt | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/cpp/src/arrow/filesystem/CMakeLists.txt b/cpp/src/arrow/filesystem/CMakeLists.txt index 2827bee91ded..661132118582 100644 --- a/cpp/src/arrow/filesystem/CMakeLists.txt +++ b/cpp/src/arrow/filesystem/CMakeLists.txt @@ -129,7 +129,7 @@ if(ARROW_S3) endif() endif() - if(ARROW_BUILD_TESTS) + if(ARROW_BUILD_TESTS AND ARROW_BUILD_SHARED) add_arrow_test(s3fs_module_test SOURCES s3fs_module_test.cc @@ -140,10 +140,6 @@ if(ARROW_S3) ARROW_S3_LIBPATH="$" EXTRA_LINK_LIBS Boost::filesystem) - target_compile_definitions(arrow-filesystem-test - PUBLIC ARROW_S3_LIBPATH="$") - target_sources(arrow-filesystem-test PUBLIC s3fs_module_test.cc s3_test_util.cc) - target_link_libraries(arrow-filesystem-test PUBLIC Boost::filesystem) endif() endif() From f7d7f5840234cad822b4bec0c6cb7e238aad171d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 22 Jun 2026 10:54:19 +0200 Subject: [PATCH 3/4] Add the new S3 library to R configure and configure.win and to C GLib and Ruby builds --- c_glib/arrow-glib/meson.build | 3 +++ c_glib/meson.build | 12 ++++++++++++ r/configure | 4 ++++ r/configure.win | 12 +++++++++--- 4 files changed, 28 insertions(+), 3 deletions(-) diff --git a/c_glib/arrow-glib/meson.build b/c_glib/arrow-glib/meson.build index c53aee72403a..defe1b21cd3f 100644 --- a/c_glib/arrow-glib/meson.build +++ b/c_glib/arrow-glib/meson.build @@ -225,6 +225,9 @@ if not gio.found() gio = dependency('gio-2.0') endif dependencies = [arrow_acero, arrow_compute, arrow, gobject, gio] +if arrow_s3.found() + dependencies += arrow_s3 +endif libarrow_glib = library( 'arrow-glib', sources: sources + enums, diff --git a/c_glib/meson.build b/c_glib/meson.build index 6cd615312f50..d8c4c9cbdd78 100644 --- a/c_glib/meson.build +++ b/c_glib/meson.build @@ -165,6 +165,13 @@ if arrow_cpp_build_lib_dir == '' modules: ['ArrowDataset::arrow_dataset_shared'], required: false, ) + arrow_s3 = dependency( + 'arrow-s3', + 'ArrowS3', + kwargs: common_args, + modules: ['ArrowS3::arrow_s3_shared'], + required: false, + ) arrow_flight = dependency( 'arrow-flight', 'ArrowFlight', @@ -235,6 +242,11 @@ main(void) dirs: [arrow_cpp_build_lib_dir], required: false, ) + arrow_s3 = cpp_compiler.find_library( + 'arrow_s3', + dirs: [arrow_cpp_build_lib_dir], + required: false, + ) arrow_flight = cpp_compiler.find_library( 'arrow_flight', dirs: [arrow_cpp_build_lib_dir], diff --git a/r/configure b/r/configure index 9e92eb6b47f2..9eab6ee0144e 100755 --- a/r/configure +++ b/r/configure @@ -358,6 +358,10 @@ add_feature_flags () { fi if arrow_built_with ARROW_S3; then PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_S3" + PKG_CONFIG_NAMES_FEATURES="$PKG_CONFIG_NAMES_FEATURES arrow-s3" + PKG_LIBS_FEATURES_WITHOUT_PC="-larrow_s3 $PKG_LIBS_FEATURES_WITHOUT_PC" + # NOTE: arrow_s3 is assumed to have the same -L flag as arrow + # so there is no need to add its location to PKG_DIRS fi if arrow_built_with ARROW_GCS; then PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_GCS" diff --git a/r/configure.win b/r/configure.win index 16c5ec1bee8d..dd06bb69bcf8 100755 --- a/r/configure.win +++ b/r/configure.win @@ -82,12 +82,13 @@ function configure_binaries() { # NOTE: If you make changes to the libraries below, you should also change # ci/scripts/r_windows_build.sh and ci/scripts/PKGBUILD PKG_CFLAGS="-I${RWINLIB}/include -DARROW_STATIC -DARROW_COMPUTE_STATIC -DPARQUET_STATIC \ - -DARROW_DS_STATIC -DARROW_ACERO_STATIC -DARROW_R_WITH_PARQUET \ + -DARROW_DS_STATIC -DARROW_ACERO_STATIC -DARROW_R_WITH_PARQUET -DARROW_S3_STATIC \ -DARROW_R_WITH_COMPUTE -DARROW_R_WITH_ACERO \ - -DARROW_R_WITH_DATASET -DARROW_R_WITH_JSON" + -DARROW_R_WITH_DATASET -DARROW_R_WITH_JSON \ + -DARROW_R_WITH_S3" PKG_LIBS="-L${RWINLIB}/lib"'$(subst gcc,,$(COMPILED_BY))$(R_ARCH) ' PKG_LIBS="$PKG_LIBS -L${RWINLIB}/lib"'$(R_ARCH)$(CRT) ' - PKG_LIBS="$PKG_LIBS -larrow_dataset -larrow_acero -lparquet -larrow_compute -larrow -larrow_bundled_dependencies \ + PKG_LIBS="$PKG_LIBS -larrow_dataset -larrow_acero -lparquet -larrow_compute -larrow_s3 -larrow -larrow_bundled_dependencies \ -lutf8proc -lsnappy -lz -lzstd -llz4 -lbz2 ${BROTLI_LIBS} -lole32 \ ${MIMALLOC_LIBS} ${OPENSSL_LIBS}" @@ -186,6 +187,10 @@ add_feature_flags () { fi if arrow_built_with ARROW_S3; then PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_S3" + PKG_CONFIG_NAMES_FEATURES="$PKG_CONFIG_NAMES_FEATURES arrow-s3" + PKG_LIBS_FEATURES_WITHOUT_PC="-larrow_s3 $PKG_LIBS_FEATURES_WITHOUT_PC" + # NOTE: arrow_s3 is assumed to have the same -L flag as arrow + # so there is no need to add its location to PKG_DIRS fi if arrow_built_with ARROW_GCS; then PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_GCS" @@ -286,6 +291,7 @@ function configure_dev() { if [ $(cmake_option ARROW_S3) -eq 1 ]; then PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_S3" + PKG_CONFIG_PACKAGES="$PKG_CONFIG_PACKAGES arrow-s3" fi if [ $(cmake_option ARROW_GCS) -eq 1 ]; then From 2f065c9d2744f3bb4fa98b0c7d7fa66eed9fbb37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 22 Jun 2026 15:17:44 +0200 Subject: [PATCH 4/4] Attempt to fix debian packages --- .../apache-arrow/debian/control.in | 25 +++++++++++++++++++ .../debian/libarrow-s3-2500.install | 1 + .../debian/libarrow-s3-dev.install | 4 +++ 3 files changed, 30 insertions(+) create mode 100644 dev/tasks/linux-packages/apache-arrow/debian/libarrow-s3-2500.install create mode 100644 dev/tasks/linux-packages/apache-arrow/debian/libarrow-s3-dev.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/control.in b/dev/tasks/linux-packages/apache-arrow/debian/control.in index 0a82fb04ebcb..f72c55afce47 100644 --- a/dev/tasks/linux-packages/apache-arrow/debian/control.in +++ b/dev/tasks/linux-packages/apache-arrow/debian/control.in @@ -119,6 +119,19 @@ Description: Apache Arrow is a data processing library for analysis . This package provides C++ library files for Dataset module. +Package: libarrow-s3-2500 +Section: libs +Architecture: any +Multi-Arch: same +Pre-Depends: ${misc:Pre-Depends} +Depends: + ${misc:Depends}, + ${shlibs:Depends}, + libarrow2500 (= ${binary:Version}) +Description: Apache Arrow is a data processing library for analysis + . + This package provides C++ library files for S3 filesystem module. + Package: libarrow-flight2500 Section: libs Architecture: any @@ -224,6 +237,18 @@ Description: Apache Arrow is a data processing library for analysis . This package provides C++ header files for dataset module. +Package: libarrow-s3-dev +Section: libdevel +Architecture: any +Multi-Arch: same +Depends: + ${misc:Depends}, + libarrow-s3-2500 (= ${binary:Version}), + libarrow-dev (= ${binary:Version}) +Description: Apache Arrow is a data processing library for analysis + . + This package provides C++ library files for S3 filesystem module. + Package: libarrow-flight-dev Section: libdevel Architecture: any diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-s3-2500.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-s3-2500.install new file mode 100644 index 000000000000..c979daae8bc6 --- /dev/null +++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-s3-2500.install @@ -0,0 +1 @@ +usr/lib/*/libarrow_s3.so.* diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-s3-dev.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-s3-dev.install new file mode 100644 index 000000000000..61a6f70717f8 --- /dev/null +++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-s3-dev.install @@ -0,0 +1,4 @@ +usr/lib/*/cmake/ArrowS3/ +usr/lib/*/libarrow_s3.a +usr/lib/*/libarrow_s3.so +usr/lib/*/pkgconfig/arrow-s3.pc