diff --git a/.github/actionlint.yaml b/.github/actionlint.yaml new file mode 100644 index 0000000000..7196b121ca --- /dev/null +++ b/.github/actionlint.yaml @@ -0,0 +1,4 @@ +self-hosted-runner: + labels: + # Custom label for GPU-enabled self-hosted runners + - gpu \ No newline at end of file diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index e738561b9c..43dfe1619f 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -23,6 +23,10 @@ on: description: Whether to test using macOS type: boolean default: false + test_gpu: + description: Whether to test using CUDA-enabled PETSc + type: boolean + default: false deploy_website: description: Whether to deploy the website type: boolean @@ -54,6 +58,10 @@ on: description: Whether to test using macOS type: boolean default: false + test_gpu: + description: Whether to test using CUDA-enabled PETSc + type: boolean + default: false deploy_website: description: Whether to deploy the website type: boolean @@ -465,6 +473,141 @@ jobs: run: | find . -delete + test_gpu: + name: Build and test Firedrake (Linux CUDA) + runs-on: [self-hosted, Linux, gpu] + container: + image: ubuntu:latest + options: --gpus all + if: inputs.test_gpu + env: + OMPI_ALLOW_RUN_AS_ROOT: 1 + OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1 + OMP_NUM_THREADS: 1 + OPENBLAS_NUM_THREADS: 1 + FIREDRAKE_CI: 1 + PYOP2_SPMD_STRICT: 1 + # Disable fast math as it exposes compiler bugs + PYOP2_CFLAGS: -fno-fast-math + # NOTE: One should occasionally update test_durations.json by running + # 'make test_durations' inside a 'firedrake:latest' Docker image. + EXTRA_PYTEST_ARGS: --splitting-algorithm least_duration --timeout=600 --timeout-method=thread -o faulthandler_timeout=660 --durations-path=./firedrake-repo/tests/test_durations.json --durations=50 + PYTEST_MPI_MAX_NPROCS: 8 + PETSC_OPTIONS: -use_gpu_aware_mpi 0 + EXTRA_OPTIONS: -use_gpu_aware_mpi 0 + steps: + - name: Confirm Nvidia GPUs are enabled + # The presence of the nvidia-smi command indicates that the Nvidia drivers have + # successfully been imported into the container, there is no point continuing + # if nvidia-smi is not present + run: nvidia-smi + + - name: Fix HOME + # For unknown reasons GitHub actions overwrite HOME to /github/home + # which will break everything unless fixed + # (https://github.com/actions/runner/issues/863) + run: echo "HOME=/root" >> "$GITHUB_ENV" + + + # Git is needed for actions/checkout and Python for firedrake-configure + # curl needed for adding new deb repositories to ubuntu + - name: Install system dependencies (1) + run: | + apt-get update + apt-get -y install git python3 curl + + + - name: Pre-run cleanup + # Make sure the current directory is empty + run: find . -delete + + - uses: actions/checkout@v5 + with: + path: firedrake-repo + ref: ${{ inputs.source_ref }} + + - name: Add Nvidia CUDA deb repositories + run: | + deburl=$( python3 ./firedrake-repo/scripts/firedrake-configure --show-extra-repo-pkg-url --gpu-arch cuda ) + debfile=$( basename "${deburl}" ) + curl -fsSLO "${deburl}" + dpkg -i "${debfile}" + apt-get update + + - name: Install system dependencies (2) + run: | + apt-get -y install \ + $(python3 ./firedrake-repo/scripts/firedrake-configure --arch default --gpu-arch cuda --show-system-packages) + apt-get -y install python3-venv + : # Dependencies needed to run the test suite + apt-get -y install fonts-dejavu graphviz graphviz-dev parallel poppler-utils + + - name: Install PETSc + run: | + if [ ${{ inputs.target_branch }} = 'release' ]; then + git clone --depth 1 \ + --branch $(python3 ./firedrake-repo/scripts/firedrake-configure --gpu-arch cuda --show-petsc-version) \ + https://gitlab.com/petsc/petsc.git + else + git clone --depth 1 https://gitlab.com/petsc/petsc.git + fi + cd petsc + python3 ../firedrake-repo/scripts/firedrake-configure \ + --arch default --gpu-arch cuda --show-petsc-configure-options | \ + xargs -L1 ./configure --with-make-np=4 + make + make check + { + echo "PETSC_DIR=/__w/firedrake/firedrake/petsc" + echo "PETSC_ARCH=arch-firedrake-default-cuda" + echo "SLEPC_DIR=/__w/firedrake/firedrake/petsc/arch-firedrake-default-cuda" + } >> "$GITHUB_ENV" + + - name: Install Firedrake + id: install + run: | + export $(python3 ./firedrake-repo/scripts/firedrake-configure --arch default --gpu-arch cuda --show-env) + python3 -m venv venv + . venv/bin/activate + + : # Empty the pip cache to ensure that everything is compiled from scratch + pip cache purge + + if [ ${{ inputs.target_branch }} = 'release' ]; then + EXTRA_PIP_FLAGS='' + else + : # Install build dependencies + pip install "$PETSC_DIR"/src/binding/petsc4py + pip install -r ./firedrake-repo/requirements-build.txt + + : # We have to pass '--no-build-isolation' to use a custom petsc4py + EXTRA_PIP_FLAGS='--no-build-isolation' + fi + + pip install --verbose $EXTRA_PIP_FLAGS \ + --no-binary h5py \ + './firedrake-repo[check]' + + firedrake-clean + pip list + + - name: Run smoke tests + run: | + . venv/bin/activate + firedrake-check + timeout-minutes: 10 + + - name: Verify GPU usage + run: | + . venv/bin/activate + export PETSC_OPTIONS="${PETSC_OPTIONS} -log_view_gpu_time -log_view" + python3 ./firedrake-repo/tests/firedrake/offload/test_poisson_offloading_pc.py + + - name: Post-run cleanup + if: always() + run: | + find . -delete + lint: name: Lint codebase runs-on: ubuntu-latest diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index a1430b57e7..6b63f97cae 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -12,4 +12,6 @@ jobs: target_branch: ${{ github.base_ref }} # Only run macOS tests if the PR is labelled 'macOS' test_macos: ${{ contains(github.event.pull_request.labels.*.name, 'macOS') }} + # Only run GPU tests if the PR is labelled 'gpu' + test_gpu: ${{ contains(github.event.pull_request.labels.*.name, 'gpu') }} secrets: inherit diff --git a/firedrake/__init__.py b/firedrake/__init__.py index e0009c2b0e..1b0a78640a 100644 --- a/firedrake/__init__.py +++ b/firedrake/__init__.py @@ -88,7 +88,7 @@ def init_petsc(): ASMLinesmoothPC, ASMExtrudedStarPC, AssembledPC, AuxiliaryOperatorPC, MassInvPC, PCDPC, PatchPC, PlaneSmoother, PatchSNES, P1PC, P1SNES, LORPC, GTMGPC, PMGPC, PMGSNES, HypreAMS, HypreADS, FDMPC, - PoissonFDMPC, TwoLevelPC, HiptmairPC, FacetSplitPC, BDDCPC + PoissonFDMPC, TwoLevelPC, HiptmairPC, FacetSplitPC, BDDCPC, OffloadPC ) from firedrake.mesh import ( # noqa: F401 Mesh, ExtrudedMesh, VertexOnlyMesh, RelabeledMesh, diff --git a/firedrake/preconditioners/__init__.py b/firedrake/preconditioners/__init__.py index eb37e4f61b..e8bf6e5f63 100644 --- a/firedrake/preconditioners/__init__.py +++ b/firedrake/preconditioners/__init__.py @@ -9,6 +9,7 @@ AssembledPC, AuxiliaryOperatorPC ) from firedrake.preconditioners.massinv import MassInvPC # noqa: F401 +from firedrake.preconditioners.offload import OffloadPC # noqa: F401 from firedrake.preconditioners.pcd import PCDPC # noqa: F401 from firedrake.preconditioners.patch import ( # noqa: F401 PatchPC, PlaneSmoother, PatchSNES diff --git a/firedrake/preconditioners/offload.py b/firedrake/preconditioners/offload.py new file mode 100644 index 0000000000..57b898de79 --- /dev/null +++ b/firedrake/preconditioners/offload.py @@ -0,0 +1,99 @@ +from firedrake.preconditioners.assembled import AssembledPC +from firedrake.petsc import PETSc +from firedrake.utils import device_matrix_type +from firedrake.logging import logger +from functools import cache +import warnings + +import firedrake.dmhooks as dmhooks + +__all__ = ("OffloadPC",) + + +@cache +def offload_mat_type(pc_comm_rank) -> str | None: + mat_type = device_matrix_type() + if mat_type is None: + if pc_comm_rank == 0: + warnings.warn( + "This installation of Firedrake is not GPU-enabled, therefore OffloadPC" + "will do nothing. For this preconditioner to function correctly PETSc" + "will need to be rebuilt with some GPU capability (e.g. '--with-cuda=1')." + ) + return None + try: + dev = PETSc.Device.create() + except PETSc.Error: + if pc_comm_rank == 0: + logger.warning( + "This installation of Firedrake is GPU-enabled, but no GPU device has" + "been detected. OffloadPC will do nothing on this host" + ) + return None + if dev.getDeviceType() == "HOST": + raise RuntimeError( + "A GPU-enabled Firedrake build has been detected, and GPU hardware has been" + "detected but a GPU device was unable to be initialised." + ) + dev.destroy() + return mat_type + + +class OffloadPC(AssembledPC): + """Offload PC from CPU to GPU and back. + + Internally this makes a PETSc PC object that can be controlled by + options using the extra options prefix ``offload_``. + """ + + _prefix = "offload_" + + def initialize(self, pc): + # Check if our PETSc installation is GPU enabled + super().initialize(pc) + self.offload_mat_type = offload_mat_type(pc.comm.rank) + if self.offload_mat_type is not None: + with PETSc.Log.Event("Event: initialize offload"): + A, P = pc.getOperators() + + # Convert matrix to ajicusparse + with PETSc.Log.Event("Event: matrix offload"): + P_cu = P.convert(self.offload_mat_type) # todo + + # Transfer nullspace + P_cu.setNullSpace(P.getNullSpace()) + P_cu.setTransposeNullSpace(P.getTransposeNullSpace()) + P_cu.setNearNullSpace(P.getNearNullSpace()) + + # Update preconditioner with GPU matrix + self.pc.setOperators(A, P_cu) + + # Convert vectors to CUDA, solve and get solution on CPU back + def apply(self, pc, x, y): + if self.offload_mat_type is None: + self.pc.apply(x, y) + else: + with PETSc.Log.Event("Event: apply offload"): # + dm = pc.getDM() + with dmhooks.add_hooks(dm, self, appctx=self._ctx_ref): + with PETSc.Log.Event("Event: vectors offload"): + y_cu = PETSc.Vec() # begin + y_cu.createCUDAWithArrays(y) + x_cu = PETSc.Vec() + # Passing a vec into another vec doesnt work because original is locked + x_cu.createCUDAWithArrays(x.array_r) + with PETSc.Log.Event("Event: solve"): + self.pc.apply(x_cu, y_cu) + # Calling data to synchronize vector + tmp = y_cu.array_r # noqa: F841 + with PETSc.Log.Event("Event: vectors copy back"): + y.copy(y_cu) # + + def applyTranspose(self, pc, X, Y): + raise NotImplementedError + + def view(self, pc, viewer=None): + super().view(pc, viewer) + if hasattr(self, "pc"): + viewer.printfASCII("PC to solve on GPU\n") + self.pc.view(viewer) diff --git a/firedrake/utils.py b/firedrake/utils.py index f39f84d478..d1f2bcb260 100644 --- a/firedrake/utils.py +++ b/firedrake/utils.py @@ -23,6 +23,14 @@ SLATE_SUPPORTS_COMPLEX = False +def device_matrix_type() -> str | None: + _device_mat_type_map = {"cuda": "aijcusparse"} + for device, mat_type in _device_mat_type_map.items(): + if device in petsctools.get_external_packages(): + return mat_type + return None + + def _new_uid(comm): uid = comm.Get_attr(FIREDRAKE_UID) if uid is None: diff --git a/scripts/firedrake-check b/scripts/firedrake-check index deeb73ca6e..f76bd0788b 100644 --- a/scripts/firedrake-check +++ b/scripts/firedrake-check @@ -23,6 +23,8 @@ TESTS = { "tests/firedrake/regression/test_matrix_free.py::test_fieldsplitting[parameters3-cofunc_rhs-variational]", # near nullspace "tests/firedrake/regression/test_nullspace.py::test_near_nullspace", + # GPU offload + "tests/firedrake/offload/test_poisson_offloading_pc.py::test_poisson_offload" ), 2: ( # HDF5/checkpointing diff --git a/scripts/firedrake-configure b/scripts/firedrake-configure index 0c1030808d..53d0e2f307 100755 --- a/scripts/firedrake-configure +++ b/scripts/firedrake-configure @@ -30,6 +30,15 @@ LINUX_APT_AARCH64 = PackageManager.LINUX_APT_AARCH64 MACOS_HOMEBREW_ARM64 = PackageManager.MACOS_HOMEBREW_ARM64 +class GPUArch(enum.Enum): + NO_GPU = "none" + CUDA = "cuda" + + +NO_GPU = GPUArch.NO_GPU +CUDA = GPUArch.CUDA + + class FiredrakeArch(enum.Enum): DEFAULT = "default" COMPLEX = "complex" @@ -40,6 +49,20 @@ ARCH_COMPLEX = FiredrakeArch.COMPLEX SUPPORTED_PETSC_VERSION = "v3.24.5" +# SuperLU_DIST built via PETSc does not support CUDA 13 +SUPPORTED_CUDA_VERSION = "12.9" + + +CUDA_ARCH_MAP = { + "aarch64": "sbsa" +} +# Structure is ( deb_repo_filename, file_contents, GPG_key_URL ) +# A file named /etc/apt/sources.list.d/deb_repo_filename containing file_contents will be created +# The output of curl -fsSL GPG_Key_URL will be passed to 'apt-key add' +EXTRA_LINUX_APT_PKG_URL = { + NO_GPU: "", + CUDA: f"https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/{CUDA_ARCH_MAP.get(platform.machine(), platform.machine())}/cuda-keyring_1.1-1_all.deb", +} def main(): @@ -77,6 +100,12 @@ Please see https://firedrakeproject.org/install for more information.""" default=ARCH_DEFAULT, help="The target configuration to install.", ) + parser.add_argument( + "--gpu-arch", + choices=[arch.value for arch in GPUArch], + default="none", + help="Target GPU architecture" + ) cmd_group = parser.add_mutually_exclusive_group(required=True) cmd_group.add_argument( "--show-system-packages", @@ -113,6 +142,12 @@ Please see https://firedrakeproject.org/install for more information.""" action="store_true", help="Print out the environment variables that need to be exported to install Firedrake.", ) + cmd_group.add_argument( + "--show-extra-repo-pkg-url", + "--repopkgurl", + action="store_true", + help="Print out the URL of any package required to enable non-OS repo access for this build", + ) args = parser.parse_args() if args.package_manager is not None: @@ -124,13 +159,20 @@ Please see https://firedrakeproject.org/install for more information.""" package_manager = sniff_package_manager() arch = FiredrakeArch(args.arch) + gpu_arch = GPUArch(args.gpu_arch) + if gpu_arch != NO_GPU and package_manager == MACOS_HOMEBREW_ARM64: + raise RuntimeError( + "GPU-compatible PETSc builds are currently only supported" + "on Linux" + ) + if args.show_system_packages: if package_manager is None: raise RuntimeError( "Cannot install Firedrake dependencies without a package manager, " "please install them manually" ) - print(" ".join(SYSTEM_PACKAGES[package_manager, arch]), end="") + print(" ".join(SYSTEM_PACKAGES[package_manager, arch, gpu_arch]), end="") elif args.show_minimal_system_packages: if package_manager is None: raise RuntimeError( @@ -139,12 +181,14 @@ Please see https://firedrakeproject.org/install for more information.""" ) print(" ".join(MINIMAL_SYSTEM_PACKAGES[package_manager]), end="") elif args.show_petsc_configure_options: - print(" ".join(PETSC_CONFIGURE_OPTIONS[package_manager, arch]), end="") + print(" ".join(PETSC_CONFIGURE_OPTIONS[package_manager, arch, gpu_arch]), end="") elif args.show_petsc_version: print(SUPPORTED_PETSC_VERSION, end="") + elif args.show_extra_repo_pkg_url: + print(EXTRA_LINUX_APT_PKG_URL[gpu_arch], end="") else: assert args.show_env - print(" ".join(ENVIRONMENT_VARS[package_manager, arch]), end="") + print(" ".join(ENVIRONMENT_VARS[package_manager, arch, gpu_arch]), end="") def sniff_package_manager() -> Optional[PackageManager]: @@ -199,7 +243,7 @@ BASE_LINUX_APT_PACKAGES = ( MINIMAL_LINUX_APT_PACKAGES + ("bison", "cmake", "libopenblas-dev", "libopenmpi-dev") ) -PETSC_EXTRAS_LINUX_APT_PACKAGES = ( +PETSC_EXTRAS_COMMON_APT_PACKAGES = ( "libfftw3-dev", "libfftw3-mpi-dev", "libhwloc-dev", @@ -210,13 +254,33 @@ PETSC_EXTRAS_LINUX_APT_PACKAGES = ( "libpnetcdf-dev", "libptscotch-dev", "libscalapack-openmpi-dev", +) + +PETSC_EXTRAS_LINUX_APT_PACKAGES = PETSC_EXTRAS_COMMON_APT_PACKAGES + ( "libsuitesparse-dev", "libsuperlu-dev", "libsuperlu-dist-dev", ) +cuda_ver_str = SUPPORTED_CUDA_VERSION.replace(".", "-") + +PETSC_EXTRAS_LINUX_APT_CUDA_PACKAGES = PETSC_EXTRAS_COMMON_APT_PACKAGES + ( + f"cuda-compat-{cuda_ver_str}", + f"cuda-nvtx-{cuda_ver_str}", + f"cuda-cudart-dev-{cuda_ver_str}", + f"cuda-command-line-tools-{cuda_ver_str}", + f"cuda-minimal-build-{cuda_ver_str}", + f"cuda-libraries-dev-{cuda_ver_str}", + f"cuda-nvml-dev-{cuda_ver_str}", + f"libnpp-dev-{cuda_ver_str}", + f"libcusparse-dev-{cuda_ver_str}", + f"libcublas-dev-{cuda_ver_str}", +) + LINUX_APT_PACKAGES = BASE_LINUX_APT_PACKAGES + PETSC_EXTRAS_LINUX_APT_PACKAGES +LINUX_APT_PACKAGES_CUDA = BASE_LINUX_APT_PACKAGES + PETSC_EXTRAS_LINUX_APT_CUDA_PACKAGES + MINIMAL_MACOS_HOMEBREW_PACKAGES = ( "autoconf", "automake", @@ -255,12 +319,14 @@ MINIMAL_SYSTEM_PACKAGES = { } SYSTEM_PACKAGES = { - (LINUX_APT_X86_64, ARCH_DEFAULT): LINUX_APT_PACKAGES, - (LINUX_APT_X86_64, ARCH_COMPLEX): LINUX_APT_PACKAGES, - (LINUX_APT_AARCH64, ARCH_DEFAULT): LINUX_APT_PACKAGES, - (LINUX_APT_AARCH64, ARCH_COMPLEX): LINUX_APT_PACKAGES, - (MACOS_HOMEBREW_ARM64, ARCH_DEFAULT): MACOS_HOMEBREW_PACKAGES, - (MACOS_HOMEBREW_ARM64, ARCH_COMPLEX): MACOS_HOMEBREW_PACKAGES, + (LINUX_APT_X86_64, ARCH_DEFAULT, NO_GPU): LINUX_APT_PACKAGES, + (LINUX_APT_X86_64, ARCH_COMPLEX, NO_GPU): LINUX_APT_PACKAGES, + (LINUX_APT_AARCH64, ARCH_DEFAULT, NO_GPU): LINUX_APT_PACKAGES, + (LINUX_APT_AARCH64, ARCH_COMPLEX, NO_GPU): LINUX_APT_PACKAGES, + (MACOS_HOMEBREW_ARM64, ARCH_DEFAULT, NO_GPU): MACOS_HOMEBREW_PACKAGES, + (MACOS_HOMEBREW_ARM64, ARCH_COMPLEX, NO_GPU): MACOS_HOMEBREW_PACKAGES, + (LINUX_APT_X86_64, ARCH_DEFAULT, CUDA): LINUX_APT_PACKAGES_CUDA, + (LINUX_APT_AARCH64, ARCH_DEFAULT, CUDA): LINUX_APT_PACKAGES_CUDA, } COMMON_PETSC_CONFIGURE_OPTIONS = ( @@ -271,11 +337,18 @@ COMMON_PETSC_CONFIGURE_OPTIONS = ( "--with-strict-petscerrorcode", ) + +class PetscPackageAction(enum.IntEnum): + PETSC_AUTODETECT = enum.auto() + PETSC_DOWNLOAD = enum.auto() + + # Placeholder value to use when we want PETSc to autodetect the package -PETSC_AUTODETECT = 333 +PETSC_AUTODETECT = PetscPackageAction.PETSC_AUTODETECT # Placeholder value to use when we want PETSc to download the package -PETSC_DOWNLOAD = 666 +PETSC_DOWNLOAD = PetscPackageAction.PETSC_DOWNLOAD + # For each package and architecture there are a number of different types of input: # 1. PETSC_AUTODETECT - PETSc will be able to find the package itself @@ -285,7 +358,10 @@ PETSC_DOWNLOAD = 666 # 'lib' subdirectories) # 4. tuple[str, tuple[str, ...]] - a 2-tuple consisting of the includes directory # (location of the header files) and a collection of library files that PETSc needs. -PETSC_EXTERNAL_PACKAGE_SPECS = { +PetscSpecValueType = PetscPackageAction | str | tuple[str | None, tuple[str, ...]] +PetscSpecsDictType = dict[str, dict[PackageManager, PetscSpecValueType]] + +PETSC_EXTERNAL_PACKAGE_SPECS_COMMON: PetscSpecsDictType = { "bison": { LINUX_APT_X86_64: PETSC_AUTODETECT, LINUX_APT_AARCH64: PETSC_AUTODETECT, @@ -341,16 +417,6 @@ PETSC_EXTERNAL_PACKAGE_SPECS = { LINUX_APT_AARCH64: (None, ("-lscalapack-openmpi",)), MACOS_HOMEBREW_ARM64: "/opt/homebrew", }, - "suitesparse": { - LINUX_APT_X86_64: PETSC_AUTODETECT, - LINUX_APT_AARCH64: PETSC_AUTODETECT, - MACOS_HOMEBREW_ARM64: "/opt/homebrew", - }, - "superlu_dist": { - LINUX_APT_X86_64: PETSC_AUTODETECT, - LINUX_APT_AARCH64: PETSC_AUTODETECT, - MACOS_HOMEBREW_ARM64: PETSC_DOWNLOAD, - }, "zlib": { LINUX_APT_X86_64: PETSC_AUTODETECT, LINUX_APT_AARCH64: PETSC_AUTODETECT, @@ -358,6 +424,43 @@ PETSC_EXTERNAL_PACKAGE_SPECS = { }, } +PETSC_EXTERNAL_PACKAGE_SPECS: PetscSpecsDictType = ( + PETSC_EXTERNAL_PACKAGE_SPECS_COMMON + | { + "suitesparse": { + LINUX_APT_X86_64: PETSC_AUTODETECT, + LINUX_APT_AARCH64: PETSC_AUTODETECT, + MACOS_HOMEBREW_ARM64: "/opt/homebrew", + }, + "superlu_dist": { + LINUX_APT_X86_64: PETSC_AUTODETECT, + LINUX_APT_AARCH64: PETSC_AUTODETECT, + MACOS_HOMEBREW_ARM64: PETSC_DOWNLOAD, + }, + } +) + +PETSC_EXTERNAL_PACKAGE_SPECS_CUDA: PetscSpecsDictType = ( + PETSC_EXTERNAL_PACKAGE_SPECS_COMMON + | { + "suitesparse": { + LINUX_APT_X86_64: PETSC_DOWNLOAD, + LINUX_APT_AARCH64: PETSC_DOWNLOAD, + MACOS_HOMEBREW_ARM64: "/opt/homebrew", + }, + "superlu_dist": { + LINUX_APT_X86_64: PETSC_DOWNLOAD, + LINUX_APT_AARCH64: PETSC_DOWNLOAD, + MACOS_HOMEBREW_ARM64: PETSC_DOWNLOAD, + }, + "umpire": { + LINUX_APT_X86_64: PETSC_DOWNLOAD, + LINUX_APT_AARCH64: PETSC_DOWNLOAD, + MACOS_HOMEBREW_ARM64: PETSC_DOWNLOAD, + }, + } +) + COMMON_PETSC_EXTERNAL_PACKAGES = ( "bison", "fftw", @@ -374,10 +477,13 @@ COMMON_PETSC_EXTERNAL_PACKAGES = ( "zlib", ) +PETSC_EXTRA_EXTERNAL_PACKAGES_CUDA = ("umpire",) + def prepare_external_package_configure_options( external_packages: Sequence[str], - package_manager: Optional[PackageManager], + package_manager: PackageManager | None = None, + gpu_arch: GPUArch = NO_GPU, ) -> tuple[str, ...]: configure_options = [] for external_package in external_packages: @@ -385,7 +491,10 @@ def prepare_external_package_configure_options( # Don't know anything about the system, download everything package_spec = PETSC_DOWNLOAD else: - package_spec = PETSC_EXTERNAL_PACKAGE_SPECS[external_package][package_manager] + if gpu_arch == NO_GPU: + package_spec = PETSC_EXTERNAL_PACKAGE_SPECS[external_package][package_manager] + elif gpu_arch == CUDA: + package_spec = PETSC_EXTERNAL_PACKAGE_SPECS_CUDA[external_package][package_manager] if package_spec == PETSC_AUTODETECT: # PETSc will find the package for us @@ -408,12 +517,20 @@ def prepare_external_package_configure_options( return tuple(configure_options) +def get_petsc_arch(arch: FiredrakeArch, gpu_arch: GPUArch) -> str: + arr = ["arch", "firedrake", arch.value] + if gpu_arch != NO_GPU: + arr.append(gpu_arch.value) + return "-".join(arr) + + def prepare_configure_options( package_manager: Optional[PackageManager], arch: FiredrakeArch, + gpu_arch: GPUArch, ) -> tuple[str, ...]: configure_options = list(COMMON_PETSC_CONFIGURE_OPTIONS) - configure_options.append(f"PETSC_ARCH=arch-firedrake-{arch.value}") + configure_options.append(f"PETSC_ARCH={get_petsc_arch(arch, gpu_arch)}") # include/link flags if package_manager in (LINUX_APT_X86_64, LINUX_APT_AARCH64): @@ -426,10 +543,14 @@ def prepare_configure_options( includes = ( f"{incdir}/hdf5/openmpi", f"{incdir}/scotch", - f"{incdir}/superlu", - f"{incdir}/superlu-dist", ) + if gpu_arch == NO_GPU: + includes = includes + ( + f"{incdir}/superlu", + f"{incdir}/superlu-dist", + ) + libraries = ( f"{libdir}/hdf5/openmpi", ) @@ -458,39 +579,59 @@ def prepare_configure_options( if arch == ARCH_COMPLEX: configure_options.append("--with-scalar-type=complex") + if gpu_arch == CUDA: + configure_options.extend( + ["--with-cuda=1", "--with-openmp=1", "--with-cxx-dialect=c++17"] + ) + external_packages = list(COMMON_PETSC_EXTERNAL_PACKAGES) if arch != ARCH_COMPLEX: external_packages.append("hypre") + if gpu_arch == CUDA: + external_packages.extend(PETSC_EXTRA_EXTERNAL_PACKAGES_CUDA) configure_options.extend( - prepare_external_package_configure_options(external_packages, package_manager) + prepare_external_package_configure_options( + external_packages, package_manager, gpu_arch + ) ) return tuple(configure_options) +PETSC_VALID_BUILD_COMBINATIONS = ( + (LINUX_APT_X86_64, ARCH_DEFAULT, NO_GPU), + (LINUX_APT_X86_64, ARCH_COMPLEX, NO_GPU), + (LINUX_APT_AARCH64, ARCH_DEFAULT, NO_GPU), + (LINUX_APT_AARCH64, ARCH_COMPLEX, NO_GPU), + (MACOS_HOMEBREW_ARM64, ARCH_DEFAULT, NO_GPU), + (MACOS_HOMEBREW_ARM64, ARCH_COMPLEX, NO_GPU), + (None, ARCH_DEFAULT, NO_GPU), + (None, ARCH_COMPLEX, NO_GPU), + (LINUX_APT_X86_64, ARCH_DEFAULT, CUDA), + (LINUX_APT_AARCH64, ARCH_DEFAULT, CUDA), + (None, ARCH_DEFAULT, CUDA), +) + + PETSC_CONFIGURE_OPTIONS = { - (package_manager, arch): prepare_configure_options(package_manager, arch) - for (package_manager, arch) in ( - (LINUX_APT_X86_64, ARCH_DEFAULT), - (LINUX_APT_X86_64, ARCH_COMPLEX), - (LINUX_APT_AARCH64, ARCH_DEFAULT), - (LINUX_APT_AARCH64, ARCH_COMPLEX), - (MACOS_HOMEBREW_ARM64, ARCH_DEFAULT), - (MACOS_HOMEBREW_ARM64, ARCH_COMPLEX), - (None, ARCH_DEFAULT), - (None, ARCH_COMPLEX), + (package_manager, arch, gpu_arch): prepare_configure_options( + package_manager, arch, gpu_arch ) + for (package_manager, arch, gpu_arch) in PETSC_VALID_BUILD_COMBINATIONS } def prepare_environment_vars( package_manager: Optional[PackageManager], arch: FiredrakeArch, + gpu_arch: GPUArch, ) -> tuple[str, ...]: vars = { "PETSC_DIR": f"{os.getcwd()}/petsc", - "PETSC_ARCH": f"arch-firedrake-{arch.value}", + "PETSC_ARCH": get_petsc_arch(arch, gpu_arch), "HDF5_MPI": "ON", } + if gpu_arch == CUDA: + vars["PATH"] = f"/usr/local/cuda/bin:{os.environ.get('PATH', '')}" if package_manager == MACOS_HOMEBREW_ARM64: # On macOS h5py cannot find the HDF5 library without help @@ -503,17 +644,10 @@ def prepare_environment_vars( ENVIRONMENT_VARS = { - (package_manager, arch): prepare_environment_vars(package_manager, arch) - for (package_manager, arch) in ( - (LINUX_APT_X86_64, ARCH_DEFAULT), - (LINUX_APT_X86_64, ARCH_COMPLEX), - (LINUX_APT_AARCH64, ARCH_DEFAULT), - (LINUX_APT_AARCH64, ARCH_COMPLEX), - (MACOS_HOMEBREW_ARM64, ARCH_DEFAULT), - (MACOS_HOMEBREW_ARM64, ARCH_COMPLEX), - (None, ARCH_DEFAULT), - (None, ARCH_COMPLEX), + (package_manager, arch, gpu_arch): prepare_environment_vars( + package_manager, arch, gpu_arch ) + for (package_manager, arch, gpu_arch) in PETSC_VALID_BUILD_COMBINATIONS } diff --git a/setup.py b/setup.py index 1d5b588355..692b3bafa4 100644 --- a/setup.py +++ b/setup.py @@ -239,6 +239,7 @@ def extensions(): "tests/firedrake/regression/test_dg_advection.py", "tests/firedrake/regression/test_interpolate_cross_mesh.py", "tests/firedrake/output/test_io_function.py", + "tests/firedrake/offload/test_poisson_offloading_pc.py" ) diff --git a/tests/firedrake/conftest.py b/tests/firedrake/conftest.py index 1fd9344dd6..9c82dfc884 100644 --- a/tests/firedrake/conftest.py +++ b/tests/firedrake/conftest.py @@ -166,10 +166,14 @@ def pytest_configure(config): "markers", "skipnetgen: mark as skipped if netgen and ngsPETSc is not installed" ) + config.addinivalue_line( + "markers", + "skipnogpu: mark as skipped when GPU hardware is unavailable" + ) def pytest_collection_modifyitems(session, config, items): - from firedrake.utils import complex_mode, SLATE_SUPPORTS_COMPLEX + from firedrake.utils import complex_mode, device_matrix_type, SLATE_SUPPORTS_COMPLEX for item in items: if complex_mode: @@ -181,6 +185,10 @@ def pytest_collection_modifyitems(session, config, items): if item.get_closest_marker("skipreal") is not None: item.add_marker(pytest.mark.skip(reason="Test makes no sense unless in complex mode")) + if device_matrix_type() is None: + if item.get_closest_marker("skipnogpu") is not None: + item.add_marker(pytest.mark.skip(reason="Test requires GPU hardware to run.")) + for dep, marker, reason in dependency_skip_markers_and_reasons: if item.get_closest_marker(marker) is not None and _skip_test_dependency(dep): item.add_marker(pytest.mark.skip(reason)) diff --git a/tests/firedrake/offload/test_poisson_offloading_pc.py b/tests/firedrake/offload/test_poisson_offloading_pc.py new file mode 100644 index 0000000000..0c9da03e87 --- /dev/null +++ b/tests/firedrake/offload/test_poisson_offloading_pc.py @@ -0,0 +1,64 @@ +from firedrake import * +import pytest + + +# TODO: add marker for cuda pytests and something to check if cuda memory was really used +@pytest.mark.skipnogpu +@pytest.mark.parametrize( + "ksp_type, pc_type", [("cg", "sor"), ("cg", "gamg"), ("preonly", "lu")] +) +def test_poisson_offload(ksp_type, pc_type): + + # Different tests for poisson: cg and pctype sor, --ksp_type=cg --pc_type=gamg + print(f"Using ksp_type = {ksp_type}, and pc_type = {pc_type}.", flush=True) + + nested_parameters = { + "pc_type": "ksp", + "ksp": { + "ksp_type": ksp_type, + "ksp_max_it": 50, + "ksp_view": None, + "ksp_rtol": "1e-10", + "ksp_monitor": None, + "pc_type": pc_type, + }, + } + parameters = { + "ksp_type": "preonly", + "pc_type": "python", + "pc_python_type": "firedrake.OffloadPC", + "offload": nested_parameters, + } + + mesh = UnitSquareMesh(10, 10) + V = FunctionSpace(mesh, "CG", 1) + u = TrialFunction(V) + v = TestFunction(V) + + f = Function(V) + x, y = SpatialCoordinate(mesh) + f.interpolate(2 * pi**2 * sin(pi * x) * sin(pi * y)) + + # Equations + L = inner(grad(u), grad(v)) * dx + + # Dirichlet boundary on all sides to 0 + bcs = DirichletBC(V, 0, "on_boundary") + + # Exact solution + sol = Function(V) + R = action(L, sol) + + # Solution function + u_f = Function(V) + + problem = LinearVariationalProblem(L, R, u_f, bcs=bcs) + solver = LinearVariationalSolver(problem, solver_parameters=parameters) + solver.solve() + error = errornorm(u_f, sol) + print(f"Error norm = {error}", flush=True) + assert error < 1.0e-9 + + +if __name__ == "__main__": + test_poisson_offload("cg", "gamg")