From b552578ebac63dd8db47198472dc617dd1c02627 Mon Sep 17 00:00:00 2001 From: Carlos Guzman <42706936+guzman109@users.noreply.github.com> Date: Thu, 26 Feb 2026 15:27:23 -0600 Subject: [PATCH 01/18] Refactor build workflow for Python package Refactor GitHub Actions workflow for building and publishing Python package. Removed unnecessary steps and streamlined Python setup with 'uv'. --- ...uild-lib_array_morph-and-pypi-package.yaml | 86 +++++-------------- 1 file changed, 21 insertions(+), 65 deletions(-) diff --git a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml index 2e86554..02ec194 100644 --- a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml +++ b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml @@ -11,7 +11,6 @@ jobs: build_wheels: name: Build (${{ matrix.os }} / ${{ matrix.arch }} / py${{ matrix.python }}) runs-on: ${{ matrix.runner }} - container: ${{ matrix.container || '' }} strategy: fail-fast: false matrix: @@ -44,34 +43,21 @@ jobs: with: fetch-depth: 0 # setuptools-scm needs full history - # ────────────────────────────────────────────── - # 1. Python + system deps - # ────────────────────────────────────────────── + - name: Install uv + uses: astral-sh/setup-uv@v7 + with: + version: "0.10.6" - # manylinux containers have Python pre-installed at /opt/python/ - - name: Select Python (Linux container) - if: runner.os == 'Linux' - run: | - # Map matrix python version to manylinux cpython path - PY_VER="${{ matrix.python }}" - PY_TAG="cp${PY_VER/./}" # "3.12" → "cp312" - - # Find the matching Python in /opt/python/ - PY_DIR=$(ls -d /opt/python/${PY_TAG}-*/bin | head -1) - if [ -z "$PY_DIR" ]; then - echo "ERROR: Python $PY_VER not found in manylinux container" - ls /opt/python/ - exit 1 - fi + - name: Set up Python + run: uv python install ${{ matrix.python }} - echo "$PY_DIR" >> $GITHUB_PATH - echo "Using Python from: $PY_DIR" - $PY_DIR/python --version + # ────────────────────────────────────────────── + # 1. System deps + # ────────────────────────────────────────────── - name: Install system deps (Linux container) if: runner.os == 'Linux' run: | - # manylinux_2_28 is AlmaLinux 8 — uses yum yum install -y \ cmake ninja-build \ pkgconfig \ @@ -84,45 +70,19 @@ jobs: mesa-libGL-devel \ alsa-lib-devel \ uuid-devel \ - perl-IPC-Cmd # needed by some Conan builds (e.g. OpenSSL) - - - name: Install uv (macOS) - if: runner.os == 'macOS' - uses: astral-sh/setup-uv@v7 - with: - version: "0.10.6" - - - name: Install uv (Linux container) - if: runner.os == 'Linux' - run: | - curl -LsSf https://astral.sh/uv/0.10.6/install.sh | sh - echo "$HOME/.local/bin" >> $GITHUB_PATH - - - name: Set up Python (macOS) - if: runner.os == 'macOS' - run: uv python install ${{ matrix.python }} + perl-IPC-Cmd - name: Install system deps (macOS) if: runner.os == 'macOS' run: brew install ninja cmake - - name: Create venv + install Python tools + - name: Install Python tools run: | - if [ "${{ runner.os }}" = "macOS" ]; then - uv venv --python ${{ matrix.python }} - else - # Use the manylinux container's Python - python -m venv ${{ github.workspace }}/.venv - fi + uv venv --python ${{ matrix.python }} echo "${{ github.workspace }}/.venv/bin" >> $GITHUB_PATH - - # Upgrade pip inside venv for manylinux compatibility - ${{ github.workspace }}/.venv/bin/python -m pip install --upgrade pip - - # Install build tools - ${{ github.workspace }}/.venv/bin/pip install \ + uv pip install \ scikit-build-core setuptools-scm h5py \ - build auditwheel delocate conan + build auditwheel delocate # ────────────────────────────────────────────── # 2. Conan: install C++ deps (cached per platform) @@ -138,6 +98,7 @@ jobs: - name: Conan install working-directory: lib run: | + uv tool install conan conan profile detect --force conan install . --build=missing -of build \ -c tools.system.package_manager:mode=install \ @@ -171,14 +132,9 @@ jobs: echo "Discovered h5py HDF5 at: $H5PY_HDF5_DIR" ls -la "$H5PY_HDF5_DIR" - - name: Set macOS deployment target - if: runner.os == 'macOS' - run: echo "MACOSX_DEPLOYMENT_TARGET=12.0" >> $GITHUB_ENV - - name: Build wheel run: | - python -m build --wheel --no-isolation - + uv build --wheel --no-build-isolation --python ${{ github.workspace }}/.venv/bin/python # ────────────────────────────────────────────── # 4. Repair wheel for PyPI # ────────────────────────────────────────────── @@ -209,7 +165,7 @@ jobs: - name: Smoke test run: | - pip install wheelhouse/*.whl --force-reinstall + uv pip install wheelhouse/*.whl --force-reinstall python3 -c "import arraymorph; print('arraymorph imported successfully')" - name: Upload wheel artifact @@ -230,7 +186,7 @@ jobs: ext="${lib_file##*.}" cp "$lib_file" "lib_arraymorph-${{ matrix.os }}-${{ matrix.arch }}.$ext" echo "LIB_ARTIFACT=lib_arraymorph-${{ matrix.os }}-${{ matrix.arch }}.$ext" >> $GITHUB_ENV - + - name: Fix HDF5 paths in standalone binary (macOS) if: runner.os == 'macOS' && github.event_name == 'release' && matrix.python == '3.12' run: | @@ -240,7 +196,7 @@ jobs: install_name_tool -change "$HDF5_REF" "@rpath/$HDF5_FILENAME" "$LIB" echo "Fixed: $HDF5_REF → @rpath/$HDF5_FILENAME" otool -L "$LIB" | grep hdf5 - + - name: Fix HDF5 paths in standalone binary (Linux) if: runner.os == 'Linux' && github.event_name == 'release' && matrix.python == '3.12' run: | @@ -251,7 +207,7 @@ jobs: "$LIB" echo "Fixed HDF5 dependency" ldd "$LIB" | grep hdf5 || patchelf --print-needed "$LIB" | grep hdf5 - + - name: Attach native library to GitHub release if: github.event_name == 'release' && matrix.python == '3.12' uses: softprops/action-gh-release@v2 @@ -353,7 +309,7 @@ jobs: publish: name: Publish to PyPI - needs: [test_testpypi] + needs: [test_testpypi] # ← now waits for TestPyPI to pass runs-on: ubuntu-latest if: github.event_name == 'release' environment: From 6e781293d2bb402ff9ceca2cacaf02b31e0c803e Mon Sep 17 00:00:00 2001 From: Carlos Guzman <42706936+guzman109@users.noreply.github.com> Date: Thu, 26 Feb 2026 15:34:52 -0600 Subject: [PATCH 02/18] Add container specification to build job --- .github/workflows/build-lib_array_morph-and-pypi-package.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml index 02ec194..f7893dc 100644 --- a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml +++ b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml @@ -11,6 +11,7 @@ jobs: build_wheels: name: Build (${{ matrix.os }} / ${{ matrix.arch }} / py${{ matrix.python }}) runs-on: ${{ matrix.runner }} + container: ${{ matrix.container || '' }} strategy: fail-fast: false matrix: From afc14da12991074ec545cacd5b1fe4f1cd3bbf2d Mon Sep 17 00:00:00 2001 From: Carlos Guzman <42706936+guzman109@users.noreply.github.com> Date: Thu, 26 Feb 2026 15:38:34 -0600 Subject: [PATCH 03/18] Modify Conan installation command for OS-specific handling --- .../build-lib_array_morph-and-pypi-package.yaml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml index f7893dc..cb725e2 100644 --- a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml +++ b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml @@ -101,9 +101,14 @@ jobs: run: | uv tool install conan conan profile detect --force - conan install . --build=missing -of build \ - -c tools.system.package_manager:mode=install \ - -c tools.system.package_manager:sudo=True + if [ "${{ matrix.os }}" = "macos" ]; then + conan install . --build=missing -of build \ + -c tools.system.package_manager:mode=install \ + -c tools.system.package_manager:sudo=True + else + conan install . --build=missing -of build \ + -c tools.system.package_manager:mode=check + fi - name: Find Conan toolchain run: | From 93b47a46d05f05aff899a4dc533d9d0e1c5cde9c Mon Sep 17 00:00:00 2001 From: Carlos Guzman <42706936+guzman109@users.noreply.github.com> Date: Thu, 26 Feb 2026 15:42:22 -0600 Subject: [PATCH 04/18] Update system dependencies in build workflow Added additional development libraries for building. --- .../workflows/build-lib_array_morph-and-pypi-package.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml index cb725e2..0a52663 100644 --- a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml +++ b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml @@ -71,7 +71,12 @@ jobs: mesa-libGL-devel \ alsa-lib-devel \ uuid-devel \ - perl-IPC-Cmd + perl-IPC-Cmd \ + libfontenc-devel libXaw-devel libxkbfile-devel \ + libXres-devel libXxf86vm-devel libXv-devel \ + xcb-util-wm-devel xcb-util-image-devel \ + xcb-util-keysyms-devel xcb-util-renderutil-devel \ + xcb-util-cursor-devel libuuid-devel - name: Install system deps (macOS) if: runner.os == 'macOS' From e87851081da411e9847bc7e11ccdee06170dd20d Mon Sep 17 00:00:00 2001 From: Carlos Guzman <42706936+guzman109@users.noreply.github.com> Date: Thu, 26 Feb 2026 15:48:42 -0600 Subject: [PATCH 05/18] Add perl-Time-Piece dependency to workflow --- .github/workflows/build-lib_array_morph-and-pypi-package.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml index 0a52663..e0dac7a 100644 --- a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml +++ b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml @@ -72,6 +72,7 @@ jobs: alsa-lib-devel \ uuid-devel \ perl-IPC-Cmd \ + perl-Time-Piece \ libfontenc-devel libXaw-devel libxkbfile-devel \ libXres-devel libXxf86vm-devel libXv-devel \ xcb-util-wm-devel xcb-util-image-devel \ From ea659770e215dac4e4ffa1ba3530110dc58bfb52 Mon Sep 17 00:00:00 2001 From: Carlos Guzman Date: Thu, 26 Feb 2026 19:24:42 -0600 Subject: [PATCH 06/18] refactor(ci): migrate build workflow from Conan to vcpkg Replace Conan package manager with vcpkg for C++ dependency management in the GitHub Actions build workflow. Changes: - Remove Conan install, profile detect, and toolchain discovery steps - Remove 40+ X11/ALSA apt-get packages required by Conan's transitive deps - Add vcpkg bootstrap and binary cache steps - Linux system deps reduced to: cmake, ninja-build (yum in manylinux) - Retain manylinux_2_28 containers for both x86_64 and aarch64 - vcpkg cache keyed on lib/vcpkg.json per OS/arch Motivation: Conan pulled transitive xorg/system and Perl dependencies for a headless cloud storage library, causing build failures in manylinux containers. vcpkg builds only declared dependencies with native CMake toolchain integration. --- ...uild-lib_array_morph-and-pypi-package.yaml | 85 ++++++------------- .gitignore | 10 +++ justfile | 21 ++--- lib/CMakeLists.txt | 27 +++--- lib/conanfile.py | 41 --------- lib/justfile | 19 +++-- lib/vcpkg.json | 14 +++ 7 files changed, 83 insertions(+), 134 deletions(-) delete mode 100644 lib/conanfile.py create mode 100644 lib/vcpkg.json diff --git a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml index e0dac7a..73793da 100644 --- a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml +++ b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml @@ -1,4 +1,4 @@ -name: Build, Test, and Publish +nname: Build, Test, and Publish on: pull_request: @@ -16,21 +16,21 @@ jobs: fail-fast: false matrix: include: - # ── Linux x86_64 (manylinux_2_28 container) ── + # Linux x86_64 (manylinux_2_28 container) - { os: linux, arch: x86_64, runner: ubuntu-latest, container: "quay.io/pypa/manylinux_2_28_x86_64", python: "3.9" } - { os: linux, arch: x86_64, runner: ubuntu-latest, container: "quay.io/pypa/manylinux_2_28_x86_64", python: "3.10" } - { os: linux, arch: x86_64, runner: ubuntu-latest, container: "quay.io/pypa/manylinux_2_28_x86_64", python: "3.11" } - { os: linux, arch: x86_64, runner: ubuntu-latest, container: "quay.io/pypa/manylinux_2_28_x86_64", python: "3.12" } - { os: linux, arch: x86_64, runner: ubuntu-latest, container: "quay.io/pypa/manylinux_2_28_x86_64", python: "3.13" } - { os: linux, arch: x86_64, runner: ubuntu-latest, container: "quay.io/pypa/manylinux_2_28_x86_64", python: "3.14" } - # ── Linux aarch64 (manylinux_2_28 container) ── + # Linux aarch64 (manylinux_2_28 container) - { os: linux, arch: aarch64, runner: ubuntu-24.04-arm, container: "quay.io/pypa/manylinux_2_28_aarch64", python: "3.9" } - { os: linux, arch: aarch64, runner: ubuntu-24.04-arm, container: "quay.io/pypa/manylinux_2_28_aarch64", python: "3.10" } - { os: linux, arch: aarch64, runner: ubuntu-24.04-arm, container: "quay.io/pypa/manylinux_2_28_aarch64", python: "3.11" } - { os: linux, arch: aarch64, runner: ubuntu-24.04-arm, container: "quay.io/pypa/manylinux_2_28_aarch64", python: "3.12" } - { os: linux, arch: aarch64, runner: ubuntu-24.04-arm, container: "quay.io/pypa/manylinux_2_28_aarch64", python: "3.13" } - { os: linux, arch: aarch64, runner: ubuntu-24.04-arm, container: "quay.io/pypa/manylinux_2_28_aarch64", python: "3.14" } - # ── macOS arm64 ── + # macOS arm64 - { os: macos, arch: arm64, runner: macos-latest, python: "3.9" } - { os: macos, arch: arm64, runner: macos-latest, python: "3.10" } - { os: macos, arch: arm64, runner: macos-latest, python: "3.11" } @@ -42,7 +42,7 @@ jobs: - name: Checkout uses: actions/checkout@v4 with: - fetch-depth: 0 # setuptools-scm needs full history + fetch-depth: 0 - name: Install uv uses: astral-sh/setup-uv@v7 @@ -58,26 +58,7 @@ jobs: - name: Install system deps (Linux container) if: runner.os == 'Linux' - run: | - yum install -y \ - cmake ninja-build \ - pkgconfig \ - libX11-devel libXext-devel libXrender-devel \ - libXrandr-devel libXinerama-devel libXcursor-devel \ - libXcomposite-devel libXdamage-devel libXfixes-devel \ - libXi-devel libXtst-devel libXScrnSaver-devel \ - libxcb-devel xcb-util-devel \ - libXau-devel libXdmcp-devel \ - mesa-libGL-devel \ - alsa-lib-devel \ - uuid-devel \ - perl-IPC-Cmd \ - perl-Time-Piece \ - libfontenc-devel libXaw-devel libxkbfile-devel \ - libXres-devel libXxf86vm-devel libXv-devel \ - xcb-util-wm-devel xcb-util-image-devel \ - xcb-util-keysyms-devel xcb-util-renderutil-devel \ - xcb-util-cursor-devel libuuid-devel + run: yum install -y cmake ninja-build - name: Install system deps (macOS) if: runner.os == 'macOS' @@ -92,40 +73,22 @@ jobs: build auditwheel delocate # ────────────────────────────────────────────── - # 2. Conan: install C++ deps (cached per platform) + # 2. vcpkg: install C++ deps (cached per platform) # ────────────────────────────────────────────── - - name: Cache Conan packages + - name: Bootstrap vcpkg + run: | + git clone https://github.com/microsoft/vcpkg.git ${{ github.workspace }}/vcpkg + ${{ github.workspace }}/vcpkg/bootstrap-vcpkg.sh + echo "VCPKG_ROOT=${{ github.workspace }}/vcpkg" >> $GITHUB_ENV + echo "CMAKE_TOOLCHAIN_FILE=${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake" >> $GITHUB_ENV + + - name: Cache vcpkg packages uses: actions/cache@v4 with: - path: ~/.conan2 - key: conan-${{ matrix.os }}-${{ matrix.arch }}-${{ hashFiles('lib/conanfile.py') }} - restore-keys: conan-${{ matrix.os }}-${{ matrix.arch }}- - - - name: Conan install - working-directory: lib - run: | - uv tool install conan - conan profile detect --force - if [ "${{ matrix.os }}" = "macos" ]; then - conan install . --build=missing -of build \ - -c tools.system.package_manager:mode=install \ - -c tools.system.package_manager:sudo=True - else - conan install . --build=missing -of build \ - -c tools.system.package_manager:mode=check - fi - - - name: Find Conan toolchain - run: | - TOOLCHAIN=$(find ${{ github.workspace }}/lib/build -name "conan_toolchain.cmake" | head -1) - if [ -z "$TOOLCHAIN" ]; then - echo "ERROR: conan_toolchain.cmake not found" - find ${{ github.workspace }}/lib/build -type f -name "*.cmake" || true - exit 1 - fi - echo "CMAKE_TOOLCHAIN_FILE=$TOOLCHAIN" >> $GITHUB_ENV - echo "Found toolchain at: $TOOLCHAIN" + path: ~/.cache/vcpkg/archives + key: vcpkg-${{ matrix.os }}-${{ matrix.arch }}-${{ hashFiles('lib/vcpkg.json') }} + restore-keys: vcpkg-${{ matrix.os }}-${{ matrix.arch }}- # ────────────────────────────────────────────── # 3. Discover h5py HDF5 + build wheel @@ -145,8 +108,8 @@ jobs: ls -la "$H5PY_HDF5_DIR" - name: Build wheel - run: | - uv build --wheel --no-build-isolation --python ${{ github.workspace }}/.venv/bin/python + run: uv build --wheel --no-build-isolation --python ${{ github.workspace }}/.venv/bin/python + # ────────────────────────────────────────────── # 4. Repair wheel for PyPI # ────────────────────────────────────────────── @@ -198,7 +161,7 @@ jobs: ext="${lib_file##*.}" cp "$lib_file" "lib_arraymorph-${{ matrix.os }}-${{ matrix.arch }}.$ext" echo "LIB_ARTIFACT=lib_arraymorph-${{ matrix.os }}-${{ matrix.arch }}.$ext" >> $GITHUB_ENV - + - name: Fix HDF5 paths in standalone binary (macOS) if: runner.os == 'macOS' && github.event_name == 'release' && matrix.python == '3.12' run: | @@ -208,7 +171,7 @@ jobs: install_name_tool -change "$HDF5_REF" "@rpath/$HDF5_FILENAME" "$LIB" echo "Fixed: $HDF5_REF → @rpath/$HDF5_FILENAME" otool -L "$LIB" | grep hdf5 - + - name: Fix HDF5 paths in standalone binary (Linux) if: runner.os == 'Linux' && github.event_name == 'release' && matrix.python == '3.12' run: | @@ -219,7 +182,7 @@ jobs: "$LIB" echo "Fixed HDF5 dependency" ldd "$LIB" | grep hdf5 || patchelf --print-needed "$LIB" | grep hdf5 - + - name: Attach native library to GitHub release if: github.event_name == 'release' && matrix.python == '3.12' uses: softprops/action-gh-release@v2 @@ -321,7 +284,7 @@ jobs: publish: name: Publish to PyPI - needs: [test_testpypi] # ← now waits for TestPyPI to pass + needs: [test_testpypi] runs-on: ubuntu-latest if: github.event_name == 'release' environment: diff --git a/.gitignore b/.gitignore index e739efe..2c0814a 100644 --- a/.gitignore +++ b/.gitignore @@ -72,3 +72,13 @@ uv.lock *.flv *.mov *.wmv + +# vcpkg +lib/vcpkg_installed/ +vcpkg_installed/ + +# Conan generated (legacy) +lib/activate.sh +lib/deactivate.sh +.conan2/ +conan.conf diff --git a/justfile b/justfile index 1b1889b..befe9db 100644 --- a/justfile +++ b/justfile @@ -1,4 +1,3 @@ - # ArrayMorph — Top-Level Build Orchestration # https://just.systems @@ -7,9 +6,8 @@ set dotenv-load := true set export := true # --- Variables --- -CONAN_BUILD_DIR := "lib/build/Release/generators" -CMAKE_TOOLCHAIN_FILE := justfile_directory() / CONAN_BUILD_DIR / "conan_toolchain.cmake" -H5PY_HDF5_DIR := `./.venv/bin/python -c "import h5py,os;d=os.path.dirname(h5py.__file__);print(os.path.join(d,'.dylibs') if os.path.exists(os.path.join(d,'.dylibs')) else os.path.join(os.path.dirname(d),'h5py.libs'))"` +VCPKG_TOOLCHAIN := env("VCPKG_ROOT", home_directory() / ".vcpkg") / "scripts/buildsystems/vcpkg.cmake" +H5PY_HDF5_DIR := `./.venv/bin/python3 -c "import h5py,os;d=os.path.dirname(h5py.__file__);print(os.path.join(d,'.dylibs') if os.path.exists(os.path.join(d,'.dylibs')) else os.path.join(os.path.dirname(d),'h5py.libs'))"` # --- Recipes --- @@ -17,19 +15,19 @@ H5PY_HDF5_DIR := `./.venv/bin/python -c "import h5py,os;d=os.path.dirname(h5py._ default: @just --list -# Install C++ dependencies via Conan +# Install C++ dependencies via vcpkg deps: - cd lib && conan install . --build=missing -s build_type=Release + cd lib && vcpkg install # Build Python wheel (scikit-build-core handles CMake) wheel: - CMAKE_TOOLCHAIN_FILE={{ CMAKE_TOOLCHAIN_FILE }} \ + CMAKE_TOOLCHAIN_FILE={{ VCPKG_TOOLCHAIN }} \ H5PY_HDF5_DIR={{ H5PY_HDF5_DIR }} \ uv build --wheel --no-build-isolation # Install editable into current venv (for development iteration) dev: - CMAKE_TOOLCHAIN_FILE={{ CMAKE_TOOLCHAIN_FILE }} \ + CMAKE_TOOLCHAIN_FILE={{ VCPKG_TOOLCHAIN }} \ H5PY_HDF5_DIR={{ H5PY_HDF5_DIR }} \ uv pip install -e . @@ -42,7 +40,7 @@ test: uv venv .test-venv source .test-venv/bin/activate.fish uv pip install dist/arraymorph-0.2.0-*.whl - python -c "import arraymorph; print('Plugin:', arraymorph.get_plugin_path()); arraymorph.enable(); print('VOL enabled')" + python3 -c "import arraymorph; print('Plugin:', arraymorph.get_plugin_path()); arraymorph.enable(); print('VOL enabled')" rm -rf .test-venv # Full build + test @@ -50,14 +48,13 @@ all: build test # Clean build artifacts clean: - rm -rf lib/build dist *.egg-info .test-venv + rm -rf lib/build lib/vcpkg_installed dist *.egg-info .test-venv # Full clean rebuild rebuild: clean build # Show current env var values (for debugging) info: - @echo "CMAKE_TOOLCHAIN_FILE: {{ CMAKE_TOOLCHAIN_FILE }}" + @echo "CMAKE_TOOLCHAIN_FILE: {{ VCPKG_TOOLCHAIN }}" @echo "H5PY_HDF5_DIR: {{ H5PY_HDF5_DIR }}" @echo "Plugin lib: $(find lib/build -name 'lib_array_morph*' 2>/dev/null || echo 'not built')" - diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 897085b..86e5a60 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.20) project(ArrayMorph - VERSION 0.1.0 + VERSION 0.2.0 LANGUAGES C CXX ) @@ -28,23 +28,28 @@ include_directories(${CMAKE_SOURCE_DIR}/include) find_package(AWSSDK REQUIRED COMPONENTS core s3) # Azure SDK -find_package(AzureSDK CONFIG REQUIRED) +find_package(azure-storage-blobs-cpp CONFIG REQUIRED) # cURL and OpenSSL find_package(CURL REQUIRED) find_package(OpenSSL REQUIRED) -# --- HDF5: Conan headers + h5py runtime binary --- +# --- HDF5: vcpkg headers + h5py runtime binary --- # # ArrayMorph is a VOL plugin that gets dlopen'd by HDF5 at runtime. # We MUST link against the same HDF5 that h5py ships to avoid -# duplicate symbol conflicts. Conan provides headers, h5py provides +# duplicate symbol conflicts. vcpkg provides headers, h5py provides # the shared library. -if(NOT DEFINED ENV{H5PY_HDF5_DIR}) +# Accept H5PY_HDF5_DIR from either CMake variable (-D) or env var +if(NOT H5PY_HDF5_DIR AND DEFINED ENV{H5PY_HDF5_DIR}) + set(H5PY_HDF5_DIR "$ENV{H5PY_HDF5_DIR}") +endif() + +if(NOT H5PY_HDF5_DIR) message(FATAL_ERROR "H5PY_HDF5_DIR not set. Run:\n" - " export H5PY_HDF5_DIR=$(python -c \"" + " export H5PY_HDF5_DIR=$(python3 -c \"" "import h5py, os; " "d=os.path.dirname(h5py.__file__); " "print(os.path.join(d,'.dylibs') if os.path.exists(os.path.join(d,'.dylibs')) " @@ -52,9 +57,9 @@ if(NOT DEFINED ENV{H5PY_HDF5_DIR}) ) endif() -set(H5PY_LIB_DIR "$ENV{H5PY_HDF5_DIR}") +set(H5PY_LIB_DIR "${H5PY_HDF5_DIR}") -# Find HDF5 headers (via Conan) +# Find HDF5 headers (via vcpkg) find_package(HDF5 REQUIRED COMPONENTS C) # Locate the actual shared library in h5py's bundled directory @@ -76,7 +81,7 @@ endif() # Pick the first match list(GET _h5py_hdf5_libs 0 _h5py_hdf5_lib) -# Create imported target: Conan headers + h5py binary +# Create imported target: vcpkg headers + h5py binary add_library(hdf5_custom SHARED IMPORTED) set_target_properties(hdf5_custom PROPERTIES IMPORTED_LOCATION "${_h5py_hdf5_lib}" @@ -89,8 +94,8 @@ message(STATUS "HDF5 binary: ${_h5py_hdf5_lib}") # Collect all dependencies into a list set(ALL_DEPS - AWS::aws-sdk-cpp-core - AWS::aws-sdk-cpp-s3 + aws-cpp-sdk-core + aws-cpp-sdk-s3 Azure::azure-storage-blobs hdf5_custom OpenSSL::SSL diff --git a/lib/conanfile.py b/lib/conanfile.py deleted file mode 100644 index be860ad..0000000 --- a/lib/conanfile.py +++ /dev/null @@ -1,41 +0,0 @@ -from conan import ConanFile -from conan.tools.cmake import cmake_layout, CMakeToolchain, CMakeDeps - - -class ArrayMorphRecipe(ConanFile): - name = "ArrayMorph" - version = "0.2.0" - settings = "os", "compiler", "build_type", "arch" - - def requirements(self): - self.requires("aws-sdk-cpp/1.11.692") - self.requires("azure-sdk-for-cpp/1.16.1") - self.requires("hdf5/1.14.6") - self.requires("libcurl/8.17.0") - self.requires("openssl/3.6.1") - - def configure(self): - self.options["*"].shared = False - - # AWS SDK: ONLY S3 — disable everything that pulls in - # audio (libalsa), GUI (xorg), and other unnecessary deps - self.options["aws-sdk-cpp"].s3 = True - self.options["aws-sdk-cpp"].text_to_speech = False - self.options["aws-sdk-cpp"].access_management = False - self.options["aws-sdk-cpp"].identity_management = False - self.options["aws-sdk-cpp"].transfer = False - self.options["aws-sdk-cpp"].queues = False - self.options["aws-sdk-cpp"].messaging = False - - # Azure SDK: only blob storage - self.options["azure-sdk-for-cpp"].with_storage_blobs = True - self.options["azure-sdk-for-cpp"].with_storage_datalake = False - - def layout(self): - cmake_layout(self) - - def generate(self): - tc = CMakeToolchain(self, generator="Ninja") - tc.generate() - deps = CMakeDeps(self) - deps.generate() diff --git a/lib/justfile b/lib/justfile index 5349ac8..6ec73a1 100644 --- a/lib/justfile +++ b/lib/justfile @@ -2,11 +2,11 @@ # Variables BUILD_DIR := "build" -COMPILER_STD := "gnu20" +VCPKG_TOOLCHAIN := env("VCPKG_ROOT", home_directory() / ".vcpkg") / "scripts/buildsystems/vcpkg.cmake" # Set this to the path where H5Py installs HDF5 binary. On macOS its .dylib, Linux .so, Windows .dll -H5PY_HDF5_DIR := `python -c "import h5py,os;d=os.path.dirname(h5py.__file__);print(os.path.join(d,'.dylibs') if os.path.exists(os.path.join(d,'.dylibs')) else os.path.join(os.path.dirname(d),'h5py.libs'))"` +H5PY_HDF5_DIR := `../.venv/bin/python -c "import h5py,os;d=os.path.dirname(h5py.__file__);print(os.path.join(d,'.dylibs') if os.path.exists(os.path.join(d,'.dylibs')) else os.path.join(os.path.dirname(d),'h5py.libs'))"` # Settings set dotenv-load := true @@ -16,25 +16,26 @@ set export := true default: @just --list -# 1. Install dependencies via Conan - -# We put all .pc and .ini files in a hidden .conan_deps folder +# 1. Install dependencies via vcpkg deps: - conan install . --build=missing -s compiler.cppstd={{ COMPILER_STD }} + vcpkg install # 2. Setup the CMake build environment setup: export H5PY_HDF5_DIR={{ H5PY_HDF5_DIR }}; \ - cmake --preset conan-release + cmake -B {{ BUILD_DIR }} -S . \ + -DCMAKE_TOOLCHAIN_FILE={{ VCPKG_TOOLCHAIN }} \ + -DCMAKE_BUILD_TYPE=Release \ + -G Ninja build: - cmake --build --preset conan-release + cmake --build {{ BUILD_DIR }} # 5. Full build from scratch full-build: deps setup build # 6. Clean all build and dependency artifacts clean: - rm -rf {{ BUILD_DIR }} + rm -rf {{ BUILD_DIR }} vcpkg_installed full-clean-build: clean full-build diff --git a/lib/vcpkg.json b/lib/vcpkg.json new file mode 100644 index 0000000..f1685ab --- /dev/null +++ b/lib/vcpkg.json @@ -0,0 +1,14 @@ +{ + "name": "arraymorph", + "version-string": "0.2.0", + "dependencies": [ + { + "name": "aws-sdk-cpp", + "features": ["s3"] + }, + "azure-storage-blobs-cpp", + "hdf5", + "curl", + "openssl" + ] +} From d2b4659cd0bc9594b99741a7b9796b6f48e52392 Mon Sep 17 00:00:00 2001 From: Carlos Guzman <42706936+guzman109@users.noreply.github.com> Date: Thu, 26 Feb 2026 19:25:45 -0600 Subject: [PATCH 07/18] Fix typo in workflow name --- .github/workflows/build-lib_array_morph-and-pypi-package.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml index 73793da..227e2cd 100644 --- a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml +++ b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml @@ -1,4 +1,4 @@ -nname: Build, Test, and Publish +name: Build, Test, and Publish on: pull_request: From f6634e17818bff3f37f7f1affc77dcda69d234ed Mon Sep 17 00:00:00 2001 From: Carlos Guzman <42706936+guzman109@users.noreply.github.com> Date: Thu, 26 Feb 2026 19:29:03 -0600 Subject: [PATCH 08/18] Update Linux system dependencies in workflow Added curl, zip, unzip, and tar to the Linux dependencies. --- .github/workflows/build-lib_array_morph-and-pypi-package.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml index 227e2cd..c051f42 100644 --- a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml +++ b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml @@ -58,7 +58,7 @@ jobs: - name: Install system deps (Linux container) if: runner.os == 'Linux' - run: yum install -y cmake ninja-build + run: yum install -y cmake ninja-build curl zip unzip tar - name: Install system deps (macOS) if: runner.os == 'macOS' From 69e8875464caf6a18e7358f67ff57b68011da3c0 Mon Sep 17 00:00:00 2001 From: Carlos Guzman <42706936+guzman109@users.noreply.github.com> Date: Thu, 26 Feb 2026 19:33:34 -0600 Subject: [PATCH 09/18] Correct h5py HDF5 discovery command Fix the command to discover h5py HDF5 location in the CI workflow. --- .github/workflows/build-lib_array_morph-and-pypi-package.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml index c051f42..735bd18 100644 --- a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml +++ b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml @@ -96,7 +96,7 @@ jobs: - name: Discover h5py HDF5 location run: | - H5PY_HDF5_DIR=$(python3 -c " + H5PY_HDF5_DIR=$(${{ github.workspace }}/.venv/bin/python3 -c " import h5py, os d = os.path.dirname(h5py.__file__) dylibs = os.path.join(d, '.dylibs') From b1e7fcd77a84ccabe7ae1997b09bbf675583cda5 Mon Sep 17 00:00:00 2001 From: Carlos Guzman <42706936+guzman109@users.noreply.github.com> Date: Thu, 26 Feb 2026 19:37:36 -0600 Subject: [PATCH 10/18] Add debug step for virtual environment directory Added a debug step to check the contents of the virtual environment's bin directory. --- .github/workflows/build-lib_array_morph-and-pypi-package.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml index 735bd18..ea33dfa 100644 --- a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml +++ b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml @@ -93,7 +93,9 @@ jobs: # ────────────────────────────────────────────── # 3. Discover h5py HDF5 + build wheel # ────────────────────────────────────────────── - + - name: Debug venv + run: ls -la ${{ github.workspace }}/.venv/bin/ || echo "No .venv/bin found" + - name: Discover h5py HDF5 location run: | H5PY_HDF5_DIR=$(${{ github.workspace }}/.venv/bin/python3 -c " From 214e659ce82e2c8637f7bb1728e7f6f482b95635 Mon Sep 17 00:00:00 2001 From: Carlos Guzman <42706936+guzman109@users.noreply.github.com> Date: Thu, 26 Feb 2026 19:43:31 -0600 Subject: [PATCH 11/18] Fix paths in GitHub Actions workflow for Python tools --- .../workflows/build-lib_array_morph-and-pypi-package.yaml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml index ea33dfa..1265703 100644 --- a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml +++ b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml @@ -67,7 +67,7 @@ jobs: - name: Install Python tools run: | uv venv --python ${{ matrix.python }} - echo "${{ github.workspace }}/.venv/bin" >> $GITHUB_PATH + echo "${GITHUB_WORKSPACE}/.venv/bin" >> $GITHUB_PATH uv pip install \ scikit-build-core setuptools-scm h5py \ build auditwheel delocate @@ -98,7 +98,7 @@ jobs: - name: Discover h5py HDF5 location run: | - H5PY_HDF5_DIR=$(${{ github.workspace }}/.venv/bin/python3 -c " + H5PY_HDF5_DIR=${GITHUB_WORKSPACE}/.venv/bin/python -c " import h5py, os d = os.path.dirname(h5py.__file__) dylibs = os.path.join(d, '.dylibs') @@ -106,11 +106,9 @@ jobs: print(dylibs if os.path.exists(dylibs) else libs) ") echo "H5PY_HDF5_DIR=$H5PY_HDF5_DIR" >> $GITHUB_ENV - echo "Discovered h5py HDF5 at: $H5PY_HDF5_DIR" - ls -la "$H5PY_HDF5_DIR" - name: Build wheel - run: uv build --wheel --no-build-isolation --python ${{ github.workspace }}/.venv/bin/python + run: uv build --wheel --no-build-isolation --python ${GITHUB_WORKSPACE}/.venv/bin/python # ────────────────────────────────────────────── # 4. Repair wheel for PyPI From 9fa5ba9fdc2f8d817bf46477bdae1c13e48a265e Mon Sep 17 00:00:00 2001 From: Carlos Guzman <42706936+guzman109@users.noreply.github.com> Date: Thu, 26 Feb 2026 19:47:40 -0600 Subject: [PATCH 12/18] Refactor h5py HDF5 discovery in workflow Removed debug step for checking virtual environment and updated h5py HDF5 discovery command. --- .../workflows/build-lib_array_morph-and-pypi-package.yaml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml index 1265703..4a5ea84 100644 --- a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml +++ b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml @@ -92,13 +92,10 @@ jobs: # ────────────────────────────────────────────── # 3. Discover h5py HDF5 + build wheel - # ────────────────────────────────────────────── - - name: Debug venv - run: ls -la ${{ github.workspace }}/.venv/bin/ || echo "No .venv/bin found" - + # ────────────────────────────────────────────── - name: Discover h5py HDF5 location run: | - H5PY_HDF5_DIR=${GITHUB_WORKSPACE}/.venv/bin/python -c " + H5PY_HDF5_DIR=$(${GITHUB_WORKSPACE}/.venv/bin/python -c " import h5py, os d = os.path.dirname(h5py.__file__) dylibs = os.path.join(d, '.dylibs') From 5ef793bf611b885d60594c25317283ff9ded66e6 Mon Sep 17 00:00:00 2001 From: Carlos Guzman <42706936+guzman109@users.noreply.github.com> Date: Thu, 26 Feb 2026 19:51:41 -0600 Subject: [PATCH 13/18] Update vcpkg path variables in workflow YAML --- .../workflows/build-lib_array_morph-and-pypi-package.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml index 4a5ea84..3c5d7b0 100644 --- a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml +++ b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml @@ -78,10 +78,10 @@ jobs: - name: Bootstrap vcpkg run: | - git clone https://github.com/microsoft/vcpkg.git ${{ github.workspace }}/vcpkg - ${{ github.workspace }}/vcpkg/bootstrap-vcpkg.sh - echo "VCPKG_ROOT=${{ github.workspace }}/vcpkg" >> $GITHUB_ENV - echo "CMAKE_TOOLCHAIN_FILE=${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake" >> $GITHUB_ENV + git clone https://github.com/microsoft/vcpkg.git ${GITHUB_WORKSPACE}/vcpkg + ${GITHUB_WORKSPACE}/vcpkg/bootstrap-vcpkg.sh + echo "VCPKG_ROOT=${GITHUB_WORKSPACE}/vcpkg" >> $GITHUB_ENV + echo "CMAKE_TOOLCHAIN_FILE=${GITHUB_WORKSPACE}/vcpkg/scripts/buildsystems/vcpkg.cmake" >> $GITHUB_ENV - name: Cache vcpkg packages uses: actions/cache@v4 From da2a6f25507a50ddada955a4384e2aba8051dcc4 Mon Sep 17 00:00:00 2001 From: Carlos Guzman Date: Thu, 26 Feb 2026 20:01:22 -0600 Subject: [PATCH 14/18] Removed openssl and curl as dependencies. Using system installed versions instead. Issues with github actions trying to build openssl from bottom up. --- .github/workflows/build-lib_array_morph-and-pypi-package.yaml | 4 ++-- lib/vcpkg.json | 4 +--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml index 3c5d7b0..44c99da 100644 --- a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml +++ b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml @@ -58,11 +58,11 @@ jobs: - name: Install system deps (Linux container) if: runner.os == 'Linux' - run: yum install -y cmake ninja-build curl zip unzip tar + run: yum install -y cmake ninja-build curl zip unzip tar openssl-devel - name: Install system deps (macOS) if: runner.os == 'macOS' - run: brew install ninja cmake + run: brew install ninja cmake curl openssl - name: Install Python tools run: | diff --git a/lib/vcpkg.json b/lib/vcpkg.json index f1685ab..abea6d8 100644 --- a/lib/vcpkg.json +++ b/lib/vcpkg.json @@ -7,8 +7,6 @@ "features": ["s3"] }, "azure-storage-blobs-cpp", - "hdf5", - "curl", - "openssl" + "hdf5" ] } From 3a2e810584b483bb25161f534b353bbeaf28fb15 Mon Sep 17 00:00:00 2001 From: Carlos Guzman Date: Thu, 26 Feb 2026 20:15:19 -0600 Subject: [PATCH 15/18] Adding missing system deps in manylinux --- ...build-lib_array_morph-and-pypi-package.yaml | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml index 44c99da..9b56b9b 100644 --- a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml +++ b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml @@ -56,9 +56,23 @@ jobs: # 1. System deps # ────────────────────────────────────────────── - - name: Install system deps (Linux container) + - name: Install system deps (Linux manylinux) if: runner.os == 'Linux' - run: yum install -y cmake ninja-build curl zip unzip tar openssl-devel + run: | + yum -y install \ + git curl ca-certificates \ + zip unzip tar \ + cmake ninja-build pkgconfig \ + gcc gcc-c++ make \ + perl perl-IPC-Cmd perl-ExtUtils-MakeMaker \ + kernel-headers + + - name: Sanity check compilers + if: runner.os == 'Linux' + run: | + gcc --version + g++ --version + perl -MIPC::Cmd -e 'print "IPC::Cmd OK\n"' - name: Install system deps (macOS) if: runner.os == 'macOS' From 6a80f46a0115a3f834bd0a33526bff81a3c7db8a Mon Sep 17 00:00:00 2001 From: Carlos Guzman Date: Fri, 27 Feb 2026 11:24:07 -0600 Subject: [PATCH 16/18] Readme updated to reflect new build and python package. Cleaned up justfiles and workflows. --- ...uild-lib_array_morph-and-pypi-package.yaml | 8 +- README.md | 349 ++++++++++++++---- justfile | 16 +- lib/CMakeLists.txt | 16 +- lib/README.md | 71 +++- lib/justfile | 16 +- pyproject.toml | 2 +- 7 files changed, 359 insertions(+), 119 deletions(-) diff --git a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml index 9b56b9b..47c01f9 100644 --- a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml +++ b/.github/workflows/build-lib_array_morph-and-pypi-package.yaml @@ -109,14 +109,14 @@ jobs: # ────────────────────────────────────────────── - name: Discover h5py HDF5 location run: | - H5PY_HDF5_DIR=$(${GITHUB_WORKSPACE}/.venv/bin/python -c " + HDF5_DIR=$(${GITHUB_WORKSPACE}/.venv/bin/python -c " import h5py, os d = os.path.dirname(h5py.__file__) dylibs = os.path.join(d, '.dylibs') libs = os.path.join(os.path.dirname(d), 'h5py.libs') print(dylibs if os.path.exists(dylibs) else libs) ") - echo "H5PY_HDF5_DIR=$H5PY_HDF5_DIR" >> $GITHUB_ENV + echo "HDF5_DIR=$HDF5_DIR" >> $GITHUB_ENV - name: Build wheel run: uv build --wheel --no-build-isolation --python ${GITHUB_WORKSPACE}/.venv/bin/python @@ -128,7 +128,7 @@ jobs: - name: Repair wheel (Linux) if: runner.os == 'Linux' run: | - export LD_LIBRARY_PATH="${H5PY_HDF5_DIR}:${LD_LIBRARY_PATH}" + export LD_LIBRARY_PATH="${HDF5_DIR}:${LD_LIBRARY_PATH}" auditwheel show dist/*.whl auditwheel repair dist/*.whl -w wheelhouse/ \ --exclude libhdf5.so \ @@ -139,7 +139,7 @@ jobs: - name: Repair wheel (macOS) if: runner.os == 'macOS' run: | - export DYLD_LIBRARY_PATH="${H5PY_HDF5_DIR}:${DYLD_LIBRARY_PATH}" + export DYLD_LIBRARY_PATH="${HDF5_DIR}:${DYLD_LIBRARY_PATH}" delocate-listdeps dist/*.whl delocate-wheel -w wheelhouse/ dist/*.whl \ --exclude libhdf5 \ diff --git a/README.md b/README.md index 5b6e61f..d1ac0a6 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![Build Status](https://github.com/ICICLE-ai/arraymorph/actions/workflows/build.yml/badge.svg)](https://github.com/ICICLE-ai/arraymorph/actions/workflows/build.yml) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) -ArrayMorph is a software to manage array data stored on cloud object storage efficiently. It supports both HDF5 C++ API and h5py API. The data returned by h5py API is numpy arrays. By using h5py API, users can access array data stored on the cloud and feed the read data into machine learning pipelines seamlessly. +ArrayMorph enables efficient storage and retrieval of array data from cloud object stores, supporting AWS S3 and Azure Blob Storage. It is an HDF5 Virtual Object Layer (VOL) plugin that transparently routes HDF5 file operations to cloud storage — existing h5py or HDF5 C++ code works unchanged once the plugin is loaded. **Tag**: CI4AI @@ -11,119 +11,320 @@ ArrayMorph is a software to manage array data stored on cloud object storage eff # How-To Guides -## Install dependencies +## Install ArrayMorph -It is recommended to use Conda (and conda-forge) for managing dependencies. +```bash +pip install arraymorph +``` + +Once installed, jump straight to [Configure credentials for AWS S3](#configure-credentials-for-aws-s3) or [Azure](#configure-credentials-for-azure-blob-storage) below. + +If you need the standalone `lib_arraymorph` binary, you can [download a pre-built release](#download-a-pre-built-lib_arraymorph) or [build from source](#build-from-source). -1. Install [Miniconda](https://docs.anaconda.com/miniconda/) -2. Install [conda-build](https://docs.conda.io/projects/conda-build/en/stable/install-conda-build.html) for installing local conda packages -3. Create and activate environment with dependencies: - ```bash - conda create -n arraymorph conda-forge::gxx=9 - conda activate arraymorph - conda install -n arraymorph cmake conda-forge::hdf5=1.14.2 conda-forge::aws-sdk-cpp conda-forge::azure-storage-blobs-cpp conda-forge::h5py - ``` +## Configure credentials for AWS S3 -## Install ArrayMorph via ArrayMorph local conda package - ```bash - git clone https://github.com/ICICLE-ai/arraymorph.git - cd arraymorph/arraymorph_channel - conda index . - conda install -n arraymorph arraymorph -c file://$(pwd) -c conda-forge - ``` +Use the Python API before opening any HDF5 files: + +```python +import arraymorph + +arraymorph.configure_s3( + bucket="my-bucket", + access_key="MY_ACCESS_KEY", + secret_key="MY_SECRET_KEY", + region="us-east-1", + use_tls=True, +) +arraymorph.enable() +``` -## Install ArryMorph from source code +Or set environment variables directly: -### Build ArrayMorph ```bash -git clone https://github.com/ICICLE-ai/arraymorph.git -cd arraymorph/arraymorph -cmake -B ./build -S . -DCMAKE_PREFIX_PATH=$CONDA_PREFIX -cd build -make +export STORAGE_PLATFORM=S3 +export BUCKET_NAME=my-bucket +export AWS_ACCESS_KEY_ID=MY_ACCESS_KEY +export AWS_SECRET_ACCESS_KEY=MY_SECRET_KEY +export AWS_REGION=us-east-1 +export HDF5_PLUGIN_PATH=$(python -c "import arraymorph; print(arraymorph.get_plugin_path())") +export HDF5_VOL_CONNECTOR=arraymorph +``` + +## Configure credentials for Azure Blob Storage + +```python +import arraymorph + +arraymorph.configure_azure( + container="my-container", + connection_string="DefaultEndpointsProtocol=https;AccountName=...;AccountKey=...;EndpointSuffix=core.windows.net", +) +arraymorph.enable() ``` -### Enable VOL plugin: +Or set environment variables directly: + ```bash -export HDF5_PLUGIN_PATH=/path/to/arraymorph/arraymorph/build/src +export STORAGE_PLATFORM=Azure +export BUCKET_NAME=my-container +export AZURE_STORAGE_CONNECTION_STRING="DefaultEndpointsProtocol=https;..." +export HDF5_PLUGIN_PATH=$(python -c "import arraymorph; print(arraymorph.get_plugin_path())") export HDF5_VOL_CONNECTOR=arraymorph ``` -## Configure Environment for Cloud Access +## Use an S3-compatible object store (MinIO, Ceph, Garage) + +Pass `endpoint`, `addressing_style=True`, and `use_signed_payloads=True` to match the requirements of most self-hosted S3-compatible stores: + +```python +import arraymorph + +arraymorph.configure_s3( + bucket="my-bucket", + access_key="MY_ACCESS_KEY", + secret_key="MY_SECRET_KEY", + endpoint="http://localhost:9000", + region="us-east-1", + use_tls=False, + addressing_style=True, + use_signed_payloads=True, +) +arraymorph.enable() +``` + +## Download a pre-built lib_arraymorph + +Each [GitHub release](https://github.com/ICICLE-ai/ArrayMorph/releases) attaches standalone pre-compiled binaries of `lib_arraymorph` for all supported platforms: + +| File | Platform | +| ---------------------------------- | ------------------- | +| `lib_arraymorph-linux-x86_64.so` | Linux x86_64 | +| `lib_arraymorph-linux-aarch64.so` | Linux aarch64 | +| `lib_arraymorph-macos-arm64.dylib` | macOS Apple Silicon | + +Download the file for your platform from the release assets and set `HDF5_PLUGIN_PATH` to the directory containing it before calling `arraymorph.enable()` or setting `HDF5_VOL_CONNECTOR` manually. + +## Build from source + +Use this path if you want to compile `lib_arraymorph` yourself — for example to target a specific platform, contribute changes, or build a custom wheel. + +### Prerequisites + +- [vcpkg](https://github.com/microsoft/vcpkg) — installs the AWS and Azure C++ SDKs via CMake +- [CMake](https://cmake.org) and [Ninja](https://ninja-build.org) +- [uv](https://docs.astral.sh/uv/) — Python package manager + +### Step 1 — Clone and create a virtual environment -### AWS Configuration: ```bash -export STORAGE_PLATFORM=S3 -export BUCKET_NAME=XXXXXX -export AWS_ACCESS_KEY_ID=XXXXXX -export AWS_SECRET_ACCESS_KEY=XXXXXX -export AWS_REGION=us-east-2 # or your bucket's region +git clone https://github.com/ICICLE-ai/ArrayMorph.git +cd ArrayMorph +uv venv +source .venv/bin/activate +``` + +### Step 2 — Install h5py + +`lib_arraymorph` links against an HDF5 shared library at build time. Rather than requiring a separate system-wide HDF5 installation, the build system points CMake at the `.so` / `.dylib` that h5py already bundles. Install h5py first so those libraries are present: + +```bash +uv pip install h5py ``` -### Azure Configuration: +On macOS the bundled libraries land in `.venv/lib/python*/site-packages/h5py/.dylibs/`; on Linux in `.venv/lib/python*/site-packages/h5py.libs/`. + +### Step 3 — Configure and build the shared library + ```bash -export STORAGE_PLATFORM=Azure -export BUCKET_NAME=XXXXXX -export AZURE_STORAGE_CONNECTION_STRING=XXXXXX +export HDF5_DIR=$(.venv/bin/python -c "import h5py,os; d=os.path.dirname(h5py.__file__); print(os.path.join(d,'.dylibs') if os.path.exists(os.path.join(d,'.dylibs')) else os.path.join(os.path.dirname(d),'h5py.libs'))") + +cmake -B lib/build -S lib \ + -DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT:-~/.vcpkg}/scripts/buildsystems/vcpkg.cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -G Ninja + +cmake --build lib/build +``` + +This produces `lib/build/lib_arraymorph.dylib` on macOS or `lib/build/lib_arraymorph.so` on Linux. + +### Optional — Python package + +If you also want to use the Python API, install the package in editable mode: + +```bash +HDF5_DIR=$HDF5_DIR \ +CMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT:-~/.vcpkg}/scripts/buildsystems/vcpkg.cmake \ +uv pip install -e . +``` + +Or build a redistributable wheel: + +```bash +HDF5_DIR=$HDF5_DIR \ +CMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT:-~/.vcpkg}/scripts/buildsystems/vcpkg.cmake \ +uv build --wheel --no-build-isolation +``` + +The wheel is written to `dist/`. Install it in any environment with: + +```bash +pip install dist/arraymorph-*.whl ``` --- # Tutorials -## Run a simple example: Writing and Reading HDF5 files from Cloud - -### Prerequisites: -- AWS or Azure cloud account with credentials -- S3 bucket or Azure container -- ArrayMorph dependencies installed - -### Steps: -1. Activate conda environment - ```bash - conda activate arraymorph - ``` - -2. Write sample HDF5 data to the cloud - ```bash - cd examples/python - python3 write.py - ``` - -3. Read data back from cloud HDF5 file - ```bash - cd examples/python - python3 read.py - ``` +## Write and read a chunked array on AWS S3 + +This tutorial walks through writing a 2-D NumPy array to a cloud HDF5 file and reading a slice of it back. + +### Prerequisites + +- An AWS account with an S3 bucket, or an S3-compatible object store +- ArrayMorph installed (`pip install arraymorph`) + +### Step 1 — Configure and enable ArrayMorph + +```python +import arraymorph + +arraymorph.configure_s3( + bucket="my-bucket", + access_key="MY_ACCESS_KEY", + secret_key="MY_SECRET_KEY", + region="us-east-1", + use_tls=True, +) +arraymorph.enable() +``` + +`arraymorph.enable()` sets `HDF5_PLUGIN_PATH` and `HDF5_VOL_CONNECTOR` in the current process. Any `h5py.File(...)` call made after this point is routed through ArrayMorph. + +### Step 2 — Write array data + +```python +import h5py +import numpy as np + +data = np.fromfunction(lambda i, j: i + j, (100, 100), dtype="i4") + +with h5py.File("demo.h5", "w") as f: + f.create_dataset("values", data=data, chunks=(10, 10)) +``` + +Each 10×10 chunk is stored as a separate object in your S3 bucket. + +### Step 3 — Read a slice back + +```python +import h5py + +with h5py.File("demo.h5", "r") as f: + dset = f["values"] + print(dset.dtype) # int32 + print(dset[5:15, 5:15]) # fetches only the chunks that overlap this slice +``` + +Only the chunks that overlap the requested hyperslab are fetched from cloud storage — no full-file download occurs. + --- # Explanation -### How ArrayMorph Works +## How ArrayMorph works -ArrayMorph plugs into the HDF5 stack using a VOL (Virtual Object Layer) plugin that intercepts file operations and routes them to cloud object storage instead of local files. This allows existing HDF5 APIs (both C++ and h5py in Python) to operate on cloud-based data seamlessly, enabling transparent cloud access for scientific or ML pipelines. +ArrayMorph is implemented as an HDF5 **Virtual Object Layer (VOL)** connector. The VOL is an abstraction layer inside the HDF5 library that separates the public API from the storage implementation. By providing a plugin that registers itself as a VOL connector, ArrayMorph intercepts every HDF5 file operation before it reaches the native POSIX layer. -It supports: -- Cloud backends: AWS S3 and Azure Blob -- File formats: Current binary data stream (we plan to extend to other formats like jpg in the future) -- Languages: C++ and Python (via h5py compatibility) +When `arraymorph.enable()` is called: -The system is designed to be efficient in latency-sensitive scenarios and aims to integrate well with large-scale distributed training and inference. +1. `HDF5_PLUGIN_PATH` is set to the directory containing the compiled shared library (`lib_arraymorph.so` / `lib_arraymorph.dylib`). +2. `HDF5_VOL_CONNECTOR=arraymorph` tells HDF5 to load and activate that plugin for all subsequent file operations. + +From this point, a call like `h5py.File("demo.h5", "w")` does not touch the local filesystem. Instead, the VOL connector: + +1. Reads cloud credentials from environment variables and constructs an AWS S3 or Azure Blob client (selected by `STORAGE_PLATFORM`). +2. On dataset read/write, translates the HDF5 hyperslab selection into a list of chunks and dispatches asynchronous get/put requests against the object store — one object per chunk. + +### Chunked storage model + +HDF5 datasets are divided into fixed-size chunks (e.g. `chunks=(64, 64)` for a 2-D dataset). ArrayMorph stores each chunk as an independent object in the bucket. The object key encodes the dataset path and chunk coordinates, so a partial read only fetches the chunks that overlap the requested slice. For large chunks, ArrayMorph can issue byte-range requests to retrieve only the needed bytes within a chunk object. + +### Async I/O + +Both the S3 and Azure backends use asynchronous operations dispatched to a thread pool. This allows ArrayMorph to fetch multiple chunks in parallel, which is important for workloads that access many chunks per read (e.g. strided access patterns in machine learning data loaders). + +### Compatibility + +Because the interception happens at the VOL layer, no changes to application code are required. Any program that opens HDF5 files with h5py or the HDF5 C++ API will automatically use ArrayMorph once the plugin is loaded. --- -## References +# References + +## Python API + +### `arraymorph.enable() -> None` + +Sets `HDF5_PLUGIN_PATH` and `HDF5_VOL_CONNECTOR` in the current process environment. Must be called before any `h5py.File(...)` call. + +### `arraymorph.get_plugin_path() -> str` + +Returns the directory containing the compiled VOL plugin. Useful when you need to set `HDF5_PLUGIN_PATH` manually. + +### `arraymorph.configure_s3(bucket, access_key, secret_key, endpoint=None, region="us-east-2", use_tls=False, addressing_style=False, use_signed_payloads=False) -> None` + +Configures the S3 client. All parameters are written to environment variables consumed by the C++ plugin at file-open time. + +| Parameter | Environment variable | Default | Description | +| --------------------- | ------------------------- | ----------- | ---------------------------------------------------- | +| `bucket` | `BUCKET_NAME` | — | S3 bucket name | +| `access_key` | `AWS_ACCESS_KEY_ID` | — | Access key ID | +| `secret_key` | `AWS_SECRET_ACCESS_KEY` | — | Secret access key | +| `endpoint` | `AWS_ENDPOINT_URL_S3` | AWS default | Custom endpoint for S3-compatible stores | +| `region` | `AWS_REGION` | `us-east-2` | SigV4 signing region | +| `use_tls` | `AWS_USE_TLS` | `false` | Use HTTPS when `True` | +| `addressing_style` | `AWS_S3_ADDRESSING_STYLE` | `virtual` | `path` when `True`; required for most non-AWS stores | +| `use_signed_payloads` | `AWS_SIGNED_PAYLOADS` | `false` | Include request body in SigV4 signature | + +### `arraymorph.configure_azure(container, connection_string=None) -> None` + +Configures the Azure Blob client. + +| Parameter | Environment variable | Default | Description | +| ------------------- | --------------------------------- | -------- | ------------------------------- | +| `container` | `BUCKET_NAME` | — | Azure container name | +| `connection_string` | `AZURE_STORAGE_CONNECTION_STRING` | From env | Azure Storage connection string | + +## Environment variables + +All configuration can be applied via environment variables without using the Python API. This is useful when running HDF5 C++ programs directly. + +| Variable | Description | +| --------------------------------- | --------------------------------------------------- | +| `HDF5_PLUGIN_PATH` | Directory containing `lib_arraymorph.so` / `.dylib` | +| `HDF5_VOL_CONNECTOR` | Must be `arraymorph` to activate the plugin | +| `STORAGE_PLATFORM` | `S3` (default) or `Azure` | +| `BUCKET_NAME` | Bucket or container name | +| `AWS_ACCESS_KEY_ID` | S3 access key | +| `AWS_SECRET_ACCESS_KEY` | S3 secret key | +| `AWS_REGION` | SigV4 signing region | +| `AWS_ENDPOINT_URL_S3` | Custom S3-compatible endpoint URL | +| `AWS_USE_TLS` | `true` / `false` | +| `AWS_S3_ADDRESSING_STYLE` | `path` or `virtual` | +| `AWS_SIGNED_PAYLOADS` | `true` / `false` | +| `AZURE_STORAGE_CONNECTION_STRING` | Azure connection string | + +## External references - [HDF5 VOL connectors](https://docs.hdfgroup.org/hdf5/develop/_v_o_l.html) - [AWS SDK for C++](https://github.com/aws/aws-sdk-cpp) - [Azure SDK for C++](https://github.com/Azure/azure-sdk-for-cpp) - [h5py documentation](https://docs.h5py.org/en/stable/) -- [conda-forge](https://conda-forge.org/) --- ## Acknowledgements -This project is supported by: - -*National Science Foundation (NSF) funded AI institute for Intelligent Cyberinfrastructure with Computational Learning in the Environment (ICICLE) (OAC 2112606)* +This project is supported by the National Science Foundation (NSF) funded AI institute for Intelligent Cyberinfrastructure with Computational Learning in the Environment (ICICLE) (OAC 2112606). diff --git a/justfile b/justfile index befe9db..7a09ea9 100644 --- a/justfile +++ b/justfile @@ -7,7 +7,7 @@ set export := true # --- Variables --- VCPKG_TOOLCHAIN := env("VCPKG_ROOT", home_directory() / ".vcpkg") / "scripts/buildsystems/vcpkg.cmake" -H5PY_HDF5_DIR := `./.venv/bin/python3 -c "import h5py,os;d=os.path.dirname(h5py.__file__);print(os.path.join(d,'.dylibs') if os.path.exists(os.path.join(d,'.dylibs')) else os.path.join(os.path.dirname(d),'h5py.libs'))"` +HDF5_DIR := `./.venv/bin/python3 -c "import h5py,os;d=os.path.dirname(h5py.__file__);print(os.path.join(d,'.dylibs') if os.path.exists(os.path.join(d,'.dylibs')) else os.path.join(os.path.dirname(d),'h5py.libs'))"` # --- Recipes --- @@ -15,24 +15,20 @@ H5PY_HDF5_DIR := `./.venv/bin/python3 -c "import h5py,os;d=os.path.dirname(h5py. default: @just --list -# Install C++ dependencies via vcpkg -deps: - cd lib && vcpkg install - # Build Python wheel (scikit-build-core handles CMake) wheel: CMAKE_TOOLCHAIN_FILE={{ VCPKG_TOOLCHAIN }} \ - H5PY_HDF5_DIR={{ H5PY_HDF5_DIR }} \ - uv build --wheel --no-build-isolation + HDF5_DIR={{ HDF5_DIR }} \ + uv build --wheel --no-build-isolation # Install editable into current venv (for development iteration) dev: CMAKE_TOOLCHAIN_FILE={{ VCPKG_TOOLCHAIN }} \ - H5PY_HDF5_DIR={{ H5PY_HDF5_DIR }} \ + HDF5_DIR={{ HDF5_DIR }} \ uv pip install -e . # Full build from scratch: deps → wheel -build: deps wheel +build: wheel # Test the built wheel in an isolated venv test: @@ -56,5 +52,5 @@ rebuild: clean build # Show current env var values (for debugging) info: @echo "CMAKE_TOOLCHAIN_FILE: {{ VCPKG_TOOLCHAIN }}" - @echo "H5PY_HDF5_DIR: {{ H5PY_HDF5_DIR }}" + @echo "HDF5_DIR: {{ HDF5_DIR }}" @echo "Plugin lib: $(find lib/build -name 'lib_array_morph*' 2>/dev/null || echo 'not built')" diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 86e5a60..51630db 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -41,15 +41,15 @@ find_package(OpenSSL REQUIRED) # duplicate symbol conflicts. vcpkg provides headers, h5py provides # the shared library. -# Accept H5PY_HDF5_DIR from either CMake variable (-D) or env var -if(NOT H5PY_HDF5_DIR AND DEFINED ENV{H5PY_HDF5_DIR}) - set(H5PY_HDF5_DIR "$ENV{H5PY_HDF5_DIR}") +# Accept HDF5_DIR from either CMake variable (-D) or env var +if(NOT HDF5_DIR AND DEFINED ENV{HDF5_DIR}) + set(HDF5_DIR "$ENV{HDF5_DIR}") endif() -if(NOT H5PY_HDF5_DIR) +if(NOT HDF5_DIR) message(FATAL_ERROR - "H5PY_HDF5_DIR not set. Run:\n" - " export H5PY_HDF5_DIR=$(python3 -c \"" + "HDF5_DIR not set. Run:\n" + " export HDF5_DIR=$(python3 -c \"" "import h5py, os; " "d=os.path.dirname(h5py.__file__); " "print(os.path.join(d,'.dylibs') if os.path.exists(os.path.join(d,'.dylibs')) " @@ -57,7 +57,7 @@ if(NOT H5PY_HDF5_DIR) ) endif() -set(H5PY_LIB_DIR "${H5PY_HDF5_DIR}") +set(H5PY_LIB_DIR "${HDF5_DIR}") # Find HDF5 headers (via vcpkg) find_package(HDF5 REQUIRED COMPONENTS C) @@ -72,7 +72,7 @@ if(NOT _h5py_hdf5_libs) # List what's actually in the directory for debugging file(GLOB _h5py_dir_contents "${H5PY_LIB_DIR}/*") message(FATAL_ERROR - "No HDF5 shared library found in H5PY_HDF5_DIR=${H5PY_LIB_DIR}\n" + "No HDF5 shared library found in HDF5_DIR=${H5PY_LIB_DIR}\n" "Directory contents: ${_h5py_dir_contents}\n" "Expected libhdf5*.dylib (macOS) or libhdf5*.so* (Linux)" ) diff --git a/lib/README.md b/lib/README.md index 958f219..d90c9bb 100644 --- a/lib/README.md +++ b/lib/README.md @@ -1,17 +1,64 @@ -# Building ArrayMorph into a conda package +# lib — ArrayMorph C++ shared library -This folder contains the ArrayMorph source code (./src/), the CMake file to build ArrayMorph (CMakeLists.txt) and the Conda build recipes (build.sh, meta.yaml). +This directory contains the C++ source code and CMake build system for `lib_arraymorph`, the HDF5 VOL connector plugin. -## Build ArrayMorph conda package +## Directory layout -1. Install [Miniconda](https://docs.anaconda.com/miniconda/) -2. Install [conda-build](https://docs.conda.io/projects/conda-build/en/stable/install-conda-build.html) -3. Update conda and conda-build -4. Under the current folder, build ArrayMorph conda pacakge - ```bash - conda build -c conda-forge . - ``` +``` +lib/ +├── src/ # C++ source files +├── include/ # Public headers +├── CMakeLists.txt # CMake build definition +└── vcpkg.json # vcpkg dependency manifest (AWS SDK, Azure SDK) +``` -## Get ArrayMorph conda package +## Download a pre-built binary -ArrayMorph conda package is stored in /path/to/conda/conda-bld/linux-64/ +Each [GitHub release](https://github.com/ICICLE-ai/ArrayMorph/releases) attaches standalone pre-compiled binaries — no build toolchain required: + +| File | Platform | +|---|---| +| `lib_arraymorph-linux-x86_64.so` | Linux x86_64 | +| `lib_arraymorph-linux-aarch64.so` | Linux aarch64 | +| `lib_arraymorph-macos-arm64.dylib` | macOS Apple Silicon | + +Download the file for your platform from the release assets and point `HDF5_PLUGIN_PATH` at the containing directory. + +The standalone binary still requires an HDF5 shared library at runtime. Set `LD_LIBRARY_PATH` (Linux) or `DYLD_LIBRARY_PATH` (macOS) to the directory containing `libhdf5.so` / `libhdf5.dylib` before loading the plugin. + +## Prerequisites + +- [vcpkg](https://github.com/microsoft/vcpkg) — installs the AWS and Azure C++ SDKs via CMake +- [CMake](https://cmake.org) and [Ninja](https://ninja-build.org) +- HDF5 shared library (`.so` / `.dylib`) — set `HDF5_DIR` to the directory containing it + +## Build + +```bash +cmake -B build -S . \ + -DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT:-~/.vcpkg}/scripts/buildsystems/vcpkg.cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -G Ninja + +cmake --build build +``` + +This produces `build/lib_arraymorph.dylib` on macOS or `build/lib_arraymorph.so` on Linux. + +### Locating HDF5 + +`lib_arraymorph` must link against an existing HDF5 shared library. Set `HDF5_DIR` to the directory containing the HDF5 `.so` / `.dylib` before running CMake: + +```bash +export HDF5_DIR=/path/to/hdf5/lib +``` + +If you have h5py installed in a Python environment, you can point directly at its bundled libraries: + +```bash +# macOS +export HDF5_DIR=/path/to/.venv/lib/python3.x/site-packages/h5py/.dylibs + +# Linux +export HDF5_DIR=/path/to/.venv/lib/python3.x/site-packages/h5py.libs +``` diff --git a/lib/justfile b/lib/justfile index 6ec73a1..4a56ffd 100644 --- a/lib/justfile +++ b/lib/justfile @@ -6,7 +6,7 @@ VCPKG_TOOLCHAIN := env("VCPKG_ROOT", home_directory() / ".vcpkg") / "scripts/bui # Set this to the path where H5Py installs HDF5 binary. On macOS its .dylib, Linux .so, Windows .dll -H5PY_HDF5_DIR := `../.venv/bin/python -c "import h5py,os;d=os.path.dirname(h5py.__file__);print(os.path.join(d,'.dylibs') if os.path.exists(os.path.join(d,'.dylibs')) else os.path.join(os.path.dirname(d),'h5py.libs'))"` +HDF5_DIR := `../.venv/bin/python -c "import h5py,os;d=os.path.dirname(h5py.__file__);print(os.path.join(d,'.dylibs') if os.path.exists(os.path.join(d,'.dylibs')) else os.path.join(os.path.dirname(d),'h5py.libs'))"` # Settings set dotenv-load := true @@ -16,13 +16,9 @@ set export := true default: @just --list -# 1. Install dependencies via vcpkg -deps: - vcpkg install - -# 2. Setup the CMake build environment +# Setup the CMake build environment setup: - export H5PY_HDF5_DIR={{ H5PY_HDF5_DIR }}; \ + export HDF5_DIR={{ HDF5_DIR }}; \ cmake -B {{ BUILD_DIR }} -S . \ -DCMAKE_TOOLCHAIN_FILE={{ VCPKG_TOOLCHAIN }} \ -DCMAKE_BUILD_TYPE=Release \ @@ -31,10 +27,10 @@ setup: build: cmake --build {{ BUILD_DIR }} -# 5. Full build from scratch -full-build: deps setup build +# Full build from scratch +full-build: setup build -# 6. Clean all build and dependency artifacts +# Clean all build and dependency artifacts clean: rm -rf {{ BUILD_DIR }} vcpkg_installed diff --git a/pyproject.toml b/pyproject.toml index 50deb06..929b4c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ metadata.version.provider = "scikit_build_core.metadata.setuptools_scm" [tool.scikit-build.cmake.define] CMAKE_POSITION_INDEPENDENT_CODE = "ON" CMAKE_TOOLCHAIN_FILE = { env = "CMAKE_TOOLCHAIN_FILE", default = "" } -H5PY_HDF5_DIR = { env = "H5PY_HDF5_DIR", default = "" } +HDF5_DIR = { env = "HDF5_DIR", default = "" } [tool.setuptools_scm] local_scheme = "no-local-version" From 240fa595a41f66ea47cf5bd744b0c04f88b46c7a Mon Sep 17 00:00:00 2001 From: Carlos Guzman Date: Fri, 27 Feb 2026 11:27:51 -0600 Subject: [PATCH 17/18] Replacing old build CI --- ...morph-and-pypi-package.yaml => build.yaml} | 0 .github/workflows/build.yml | 62 ------------------- README.md | 2 +- 3 files changed, 1 insertion(+), 63 deletions(-) rename .github/workflows/{build-lib_array_morph-and-pypi-package.yaml => build.yaml} (100%) delete mode 100644 .github/workflows/build.yml diff --git a/.github/workflows/build-lib_array_morph-and-pypi-package.yaml b/.github/workflows/build.yaml similarity index 100% rename from .github/workflows/build-lib_array_morph-and-pypi-package.yaml rename to .github/workflows/build.yaml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml deleted file mode 100644 index de374ff..0000000 --- a/.github/workflows/build.yml +++ /dev/null @@ -1,62 +0,0 @@ -name: Build ArrayMorph - -on: - push: - branches: - - main - paths: - - arraymorph/** - - .github/workflows/** - pull_request: - branches: - - main - -jobs: - build: - runs-on: ubuntu-latest - - env: - VCPKG_ROOT: ${{ github.workspace }}/vcpkg - HDF5_INSTALL: ${{ github.workspace }}/HDF5 - - steps: - - name: Checkout source - uses: actions/checkout@v3 - - - name: Install dependencies - run: | - sudo apt-get update - sudo apt-get install -y build-essential pkg-config python3 python3-pip libssl-dev cmake - - - name: Install vcpkg - run: | - git clone https://github.com/microsoft/vcpkg.git $VCPKG_ROOT - cd $VCPKG_ROOT - ./bootstrap-vcpkg.sh - ./vcpkg install aws-sdk-cpp[s3]:x64-linux - rm -rf vcpkg/buildtrees vcpkg/downloads vcpkg/packages - ./vcpkg install azure-storage-blobs-cpp:x64-linux - rm -rf vcpkg/buildtrees vcpkg/downloads vcpkg/packages - - - name: Install HDF5 - run: | - git clone https://github.com/HDFGroup/hdf5.git - cd hdf5 - git checkout hdf5-1_14_2 - ./configure --prefix=$HDF5_INSTALL --enable-cxx - make -j$(nproc) - make install - rm -rf hdf5 - - - name: Install h5py - run: | - python3 -m pip install --upgrade pip - HDF5_DIR=$HDF5_INSTALL pip3 install --no-binary=h5py h5py - - - name: Build ArrayMorph - run: | - cd arraymorph - cmake -B ./build -S . \ - -DCMAKE_PREFIX_PATH=$HDF5_INSTALL \ - -DCMAKE_TOOLCHAIN_FILE=$VCPKG_ROOT/scripts/buildsystems/vcpkg.cmake - cmake --build ./build --parallel \ No newline at end of file diff --git a/README.md b/README.md index d1ac0a6..18bc448 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # ArrayMorph -[![Build Status](https://github.com/ICICLE-ai/arraymorph/actions/workflows/build.yml/badge.svg)](https://github.com/ICICLE-ai/arraymorph/actions/workflows/build.yml) +[![Build Status](https://github.com/ICICLE-ai/arraymorph/actions/workflows/build.yml/badge.svg)](https://github.com/ICICLE-ai/arraymorph/actions/workflows/build.yaml) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) ArrayMorph enables efficient storage and retrieval of array data from cloud object stores, supporting AWS S3 and Azure Blob Storage. It is an HDF5 Virtual Object Layer (VOL) plugin that transparently routes HDF5 file operations to cloud storage — existing h5py or HDF5 C++ code works unchanged once the plugin is loaded. From 2777df0eef9396a6dd6bb9dda985e65f21c88702 Mon Sep 17 00:00:00 2001 From: Carlos Guzman Date: Fri, 27 Feb 2026 11:30:57 -0600 Subject: [PATCH 18/18] build triggers on push to main. --- .github/workflows/build.yaml | 100 ++++++++++++++++++++++++++++++----- 1 file changed, 87 insertions(+), 13 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 47c01f9..1faaf57 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -1,6 +1,8 @@ name: Build, Test, and Publish on: + push: + branches: [main] pull_request: branches: [main] release: @@ -17,19 +19,91 @@ jobs: matrix: include: # Linux x86_64 (manylinux_2_28 container) - - { os: linux, arch: x86_64, runner: ubuntu-latest, container: "quay.io/pypa/manylinux_2_28_x86_64", python: "3.9" } - - { os: linux, arch: x86_64, runner: ubuntu-latest, container: "quay.io/pypa/manylinux_2_28_x86_64", python: "3.10" } - - { os: linux, arch: x86_64, runner: ubuntu-latest, container: "quay.io/pypa/manylinux_2_28_x86_64", python: "3.11" } - - { os: linux, arch: x86_64, runner: ubuntu-latest, container: "quay.io/pypa/manylinux_2_28_x86_64", python: "3.12" } - - { os: linux, arch: x86_64, runner: ubuntu-latest, container: "quay.io/pypa/manylinux_2_28_x86_64", python: "3.13" } - - { os: linux, arch: x86_64, runner: ubuntu-latest, container: "quay.io/pypa/manylinux_2_28_x86_64", python: "3.14" } + - { + os: linux, + arch: x86_64, + runner: ubuntu-latest, + container: "quay.io/pypa/manylinux_2_28_x86_64", + python: "3.9", + } + - { + os: linux, + arch: x86_64, + runner: ubuntu-latest, + container: "quay.io/pypa/manylinux_2_28_x86_64", + python: "3.10", + } + - { + os: linux, + arch: x86_64, + runner: ubuntu-latest, + container: "quay.io/pypa/manylinux_2_28_x86_64", + python: "3.11", + } + - { + os: linux, + arch: x86_64, + runner: ubuntu-latest, + container: "quay.io/pypa/manylinux_2_28_x86_64", + python: "3.12", + } + - { + os: linux, + arch: x86_64, + runner: ubuntu-latest, + container: "quay.io/pypa/manylinux_2_28_x86_64", + python: "3.13", + } + - { + os: linux, + arch: x86_64, + runner: ubuntu-latest, + container: "quay.io/pypa/manylinux_2_28_x86_64", + python: "3.14", + } # Linux aarch64 (manylinux_2_28 container) - - { os: linux, arch: aarch64, runner: ubuntu-24.04-arm, container: "quay.io/pypa/manylinux_2_28_aarch64", python: "3.9" } - - { os: linux, arch: aarch64, runner: ubuntu-24.04-arm, container: "quay.io/pypa/manylinux_2_28_aarch64", python: "3.10" } - - { os: linux, arch: aarch64, runner: ubuntu-24.04-arm, container: "quay.io/pypa/manylinux_2_28_aarch64", python: "3.11" } - - { os: linux, arch: aarch64, runner: ubuntu-24.04-arm, container: "quay.io/pypa/manylinux_2_28_aarch64", python: "3.12" } - - { os: linux, arch: aarch64, runner: ubuntu-24.04-arm, container: "quay.io/pypa/manylinux_2_28_aarch64", python: "3.13" } - - { os: linux, arch: aarch64, runner: ubuntu-24.04-arm, container: "quay.io/pypa/manylinux_2_28_aarch64", python: "3.14" } + - { + os: linux, + arch: aarch64, + runner: ubuntu-24.04-arm, + container: "quay.io/pypa/manylinux_2_28_aarch64", + python: "3.9", + } + - { + os: linux, + arch: aarch64, + runner: ubuntu-24.04-arm, + container: "quay.io/pypa/manylinux_2_28_aarch64", + python: "3.10", + } + - { + os: linux, + arch: aarch64, + runner: ubuntu-24.04-arm, + container: "quay.io/pypa/manylinux_2_28_aarch64", + python: "3.11", + } + - { + os: linux, + arch: aarch64, + runner: ubuntu-24.04-arm, + container: "quay.io/pypa/manylinux_2_28_aarch64", + python: "3.12", + } + - { + os: linux, + arch: aarch64, + runner: ubuntu-24.04-arm, + container: "quay.io/pypa/manylinux_2_28_aarch64", + python: "3.13", + } + - { + os: linux, + arch: aarch64, + runner: ubuntu-24.04-arm, + container: "quay.io/pypa/manylinux_2_28_aarch64", + python: "3.14", + } # macOS arm64 - { os: macos, arch: arm64, runner: macos-latest, python: "3.9" } - { os: macos, arch: arm64, runner: macos-latest, python: "3.10" } @@ -106,7 +180,7 @@ jobs: # ────────────────────────────────────────────── # 3. Discover h5py HDF5 + build wheel - # ────────────────────────────────────────────── + # ────────────────────────────────────────────── - name: Discover h5py HDF5 location run: | HDF5_DIR=$(${GITHUB_WORKSPACE}/.venv/bin/python -c "